Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / requests / packages / urllib3 / response.py @ 564

History | View | Annotate | Download (17.7 KB)

1
from __future__ import absolute_import
2
from contextlib import contextmanager
3
import zlib
4
import io
5
from socket import timeout as SocketTimeout
6
from socket import error as SocketError
7

    
8
from ._collections import HTTPHeaderDict
9
from .exceptions import (
10
    ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked
11
)
12
from .packages.six import string_types as basestring, binary_type, PY3
13
from .packages.six.moves import http_client as httplib
14
from .connection import HTTPException, BaseSSLError
15
from .util.response import is_fp_closed, is_response_to_head
16

    
17

    
18
class DeflateDecoder(object):
19

    
20
    def __init__(self):
21
        self._first_try = True
22
        self._data = binary_type()
23
        self._obj = zlib.decompressobj()
24

    
25
    def __getattr__(self, name):
26
        return getattr(self._obj, name)
27

    
28
    def decompress(self, data):
29
        if not data:
30
            return data
31

    
32
        if not self._first_try:
33
            return self._obj.decompress(data)
34

    
35
        self._data += data
36
        try:
37
            return self._obj.decompress(data)
38
        except zlib.error:
39
            self._first_try = False
40
            self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
41
            try:
42
                return self.decompress(self._data)
43
            finally:
44
                self._data = None
45

    
46

    
47
class GzipDecoder(object):
48

    
49
    def __init__(self):
50
        self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
51

    
52
    def __getattr__(self, name):
53
        return getattr(self._obj, name)
54

    
55
    def decompress(self, data):
56
        if not data:
57
            return data
58
        return self._obj.decompress(data)
59

    
60

    
61
def _get_decoder(mode):
62
    if mode == 'gzip':
63
        return GzipDecoder()
64

    
65
    return DeflateDecoder()
66

    
67

    
68
class HTTPResponse(io.IOBase):
69
    """
70
    HTTP Response container.
71

72
    Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
73
    loaded and decoded on-demand when the ``data`` property is accessed.  This
74
    class is also compatible with the Python standard library's :mod:`io`
75
    module, and can hence be treated as a readable object in the context of that
76
    framework.
77

78
    Extra parameters for behaviour not present in httplib.HTTPResponse:
79

80
    :param preload_content:
81
        If True, the response's body will be preloaded during construction.
82

83
    :param decode_content:
84
        If True, attempts to decode specific content-encoding's based on headers
85
        (like 'gzip' and 'deflate') will be skipped and raw data will be used
86
        instead.
87

88
    :param original_response:
89
        When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
90
        object, it's convenient to include the original for debug purposes. It's
91
        otherwise unused.
92
    """
93

    
94
    CONTENT_DECODERS = ['gzip', 'deflate']
95
    REDIRECT_STATUSES = [301, 302, 303, 307, 308]
96

    
97
    def __init__(self, body='', headers=None, status=0, version=0, reason=None,
98
                 strict=0, preload_content=True, decode_content=True,
99
                 original_response=None, pool=None, connection=None):
100

    
101
        if isinstance(headers, HTTPHeaderDict):
102
            self.headers = headers
103
        else:
104
            self.headers = HTTPHeaderDict(headers)
105
        self.status = status
106
        self.version = version
107
        self.reason = reason
108
        self.strict = strict
109
        self.decode_content = decode_content
110

    
111
        self._decoder = None
112
        self._body = None
113
        self._fp = None
114
        self._original_response = original_response
115
        self._fp_bytes_read = 0
116

    
117
        if body and isinstance(body, (basestring, binary_type)):
118
            self._body = body
119

    
120
        self._pool = pool
121
        self._connection = connection
122

    
123
        if hasattr(body, 'read'):
124
            self._fp = body
125

    
126
        # Are we using the chunked-style of transfer encoding?
127
        self.chunked = False
128
        self.chunk_left = None
129
        tr_enc = self.headers.get('transfer-encoding', '').lower()
130
        # Don't incur the penalty of creating a list and then discarding it
131
        encodings = (enc.strip() for enc in tr_enc.split(","))
132
        if "chunked" in encodings:
133
            self.chunked = True
134

    
135
        # If requested, preload the body.
136
        if preload_content and not self._body:
137
            self._body = self.read(decode_content=decode_content)
138

    
139
    def get_redirect_location(self):
140
        """
141
        Should we redirect and where to?
142

143
        :returns: Truthy redirect location string if we got a redirect status
144
            code and valid location. ``None`` if redirect status and no
145
            location. ``False`` if not a redirect status code.
146
        """
147
        if self.status in self.REDIRECT_STATUSES:
148
            return self.headers.get('location')
149

    
150
        return False
151

    
152
    def release_conn(self):
153
        if not self._pool or not self._connection:
154
            return
155

    
156
        self._pool._put_conn(self._connection)
157
        self._connection = None
158

    
159
    @property
160
    def data(self):
161
        # For backwords-compat with earlier urllib3 0.4 and earlier.
162
        if self._body:
163
            return self._body
164

    
165
        if self._fp:
166
            return self.read(cache_content=True)
167

    
168
    def tell(self):
169
        """
170
        Obtain the number of bytes pulled over the wire so far. May differ from
171
        the amount of content returned by :meth:``HTTPResponse.read`` if bytes
172
        are encoded on the wire (e.g, compressed).
173
        """
174
        return self._fp_bytes_read
175

    
176
    def _init_decoder(self):
177
        """
178
        Set-up the _decoder attribute if necessar.
179
        """
180
        # Note: content-encoding value should be case-insensitive, per RFC 7230
181
        # Section 3.2
182
        content_encoding = self.headers.get('content-encoding', '').lower()
183
        if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
184
            self._decoder = _get_decoder(content_encoding)
185

    
186
    def _decode(self, data, decode_content, flush_decoder):
187
        """
188
        Decode the data passed in and potentially flush the decoder.
189
        """
190
        try:
191
            if decode_content and self._decoder:
192
                data = self._decoder.decompress(data)
193
        except (IOError, zlib.error) as e:
194
            content_encoding = self.headers.get('content-encoding', '').lower()
195
            raise DecodeError(
196
                "Received response with content-encoding: %s, but "
197
                "failed to decode it." % content_encoding, e)
198

    
199
        if flush_decoder and decode_content:
200
            data += self._flush_decoder()
201

    
202
        return data
203

    
204
    def _flush_decoder(self):
205
        """
206
        Flushes the decoder. Should only be called if the decoder is actually
207
        being used.
208
        """
209
        if self._decoder:
210
            buf = self._decoder.decompress(b'')
211
            return buf + self._decoder.flush()
212

    
213
        return b''
214

    
215
    @contextmanager
216
    def _error_catcher(self):
217
        """
218
        Catch low-level python exceptions, instead re-raising urllib3
219
        variants, so that low-level exceptions are not leaked in the
220
        high-level api.
221

222
        On exit, release the connection back to the pool.
223
        """
224
        try:
225
            try:
226
                yield
227

    
228
            except SocketTimeout:
229
                # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
230
                # there is yet no clean way to get at it from this context.
231
                raise ReadTimeoutError(self._pool, None, 'Read timed out.')
232

    
233
            except BaseSSLError as e:
234
                # FIXME: Is there a better way to differentiate between SSLErrors?
235
                if 'read operation timed out' not in str(e):  # Defensive:
236
                    # This shouldn't happen but just in case we're missing an edge
237
                    # case, let's avoid swallowing SSL errors.
238
                    raise
239

    
240
                raise ReadTimeoutError(self._pool, None, 'Read timed out.')
241

    
242
            except (HTTPException, SocketError) as e:
243
                # This includes IncompleteRead.
244
                raise ProtocolError('Connection broken: %r' % e, e)
245

    
246
        except Exception:
247
            # The response may not be closed but we're not going to use it anymore
248
            # so close it now to ensure that the connection is released back to the pool.
249
            if self._original_response and not self._original_response.isclosed():
250
                self._original_response.close()
251

    
252
            # Closing the response may not actually be sufficient to close
253
            # everything, so if we have a hold of the connection close that
254
            # too.
255
            if self._connection is not None:
256
                self._connection.close()
257

    
258
            raise
259
        finally:
260
            if self._original_response and self._original_response.isclosed():
261
                self.release_conn()
262

    
263
    def read(self, amt=None, decode_content=None, cache_content=False):
264
        """
265
        Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
266
        parameters: ``decode_content`` and ``cache_content``.
267

268
        :param amt:
269
            How much of the content to read. If specified, caching is skipped
270
            because it doesn't make sense to cache partial content as the full
271
            response.
272

273
        :param decode_content:
274
            If True, will attempt to decode the body based on the
275
            'content-encoding' header.
276

277
        :param cache_content:
278
            If True, will save the returned data such that the same result is
279
            returned despite of the state of the underlying file object. This
280
            is useful if you want the ``.data`` property to continue working
281
            after having ``.read()`` the file object. (Overridden if ``amt`` is
282
            set.)
283
        """
284
        self._init_decoder()
285
        if decode_content is None:
286
            decode_content = self.decode_content
287

    
288
        if self._fp is None:
289
            return
290

    
291
        flush_decoder = False
292
        data = None
293

    
294
        with self._error_catcher():
295
            if amt is None:
296
                # cStringIO doesn't like amt=None
297
                data = self._fp.read()
298
                flush_decoder = True
299
            else:
300
                cache_content = False
301
                data = self._fp.read(amt)
302
                if amt != 0 and not data:  # Platform-specific: Buggy versions of Python.
303
                    # Close the connection when no data is returned
304
                    #
305
                    # This is redundant to what httplib/http.client _should_
306
                    # already do.  However, versions of python released before
307
                    # December 15, 2012 (http://bugs.python.org/issue16298) do
308
                    # not properly close the connection in all cases. There is
309
                    # no harm in redundantly calling close.
310
                    self._fp.close()
311
                    flush_decoder = True
312

    
313
        if data:
314
            self._fp_bytes_read += len(data)
315

    
316
            data = self._decode(data, decode_content, flush_decoder)
317

    
318
            if cache_content:
319
                self._body = data
320

    
321
        return data
322

    
323
    def stream(self, amt=2**16, decode_content=None):
324
        """
325
        A generator wrapper for the read() method. A call will block until
326
        ``amt`` bytes have been read from the connection or until the
327
        connection is closed.
328

329
        :param amt:
330
            How much of the content to read. The generator will return up to
331
            much data per iteration, but may return less. This is particularly
332
            likely when using compressed data. However, the empty string will
333
            never be returned.
334

335
        :param decode_content:
336
            If True, will attempt to decode the body based on the
337
            'content-encoding' header.
338
        """
339
        if self.chunked:
340
            for line in self.read_chunked(amt, decode_content=decode_content):
341
                yield line
342
        else:
343
            while not is_fp_closed(self._fp):
344
                data = self.read(amt=amt, decode_content=decode_content)
345

    
346
                if data:
347
                    yield data
348

    
349
    @classmethod
350
    def from_httplib(ResponseCls, r, **response_kw):
351
        """
352
        Given an :class:`httplib.HTTPResponse` instance ``r``, return a
353
        corresponding :class:`urllib3.response.HTTPResponse` object.
354

355
        Remaining parameters are passed to the HTTPResponse constructor, along
356
        with ``original_response=r``.
357
        """
358
        headers = r.msg
359

    
360
        if not isinstance(headers, HTTPHeaderDict):
361
            if PY3:  # Python 3
362
                headers = HTTPHeaderDict(headers.items())
363
            else:  # Python 2
364
                headers = HTTPHeaderDict.from_httplib(headers)
365

    
366
        # HTTPResponse objects in Python 3 don't have a .strict attribute
367
        strict = getattr(r, 'strict', 0)
368
        resp = ResponseCls(body=r,
369
                           headers=headers,
370
                           status=r.status,
371
                           version=r.version,
372
                           reason=r.reason,
373
                           strict=strict,
374
                           original_response=r,
375
                           **response_kw)
376
        return resp
377

    
378
    # Backwards-compatibility methods for httplib.HTTPResponse
379
    def getheaders(self):
380
        return self.headers
381

    
382
    def getheader(self, name, default=None):
383
        return self.headers.get(name, default)
384

    
385
    # Overrides from io.IOBase
386
    def close(self):
387
        if not self.closed:
388
            self._fp.close()
389

    
390
    @property
391
    def closed(self):
392
        if self._fp is None:
393
            return True
394
        elif hasattr(self._fp, 'closed'):
395
            return self._fp.closed
396
        elif hasattr(self._fp, 'isclosed'):  # Python 2
397
            return self._fp.isclosed()
398
        else:
399
            return True
400

    
401
    def fileno(self):
402
        if self._fp is None:
403
            raise IOError("HTTPResponse has no file to get a fileno from")
404
        elif hasattr(self._fp, "fileno"):
405
            return self._fp.fileno()
406
        else:
407
            raise IOError("The file-like object this HTTPResponse is wrapped "
408
                          "around has no file descriptor")
409

    
410
    def flush(self):
411
        if self._fp is not None and hasattr(self._fp, 'flush'):
412
            return self._fp.flush()
413

    
414
    def readable(self):
415
        # This method is required for `io` module compatibility.
416
        return True
417

    
418
    def readinto(self, b):
419
        # This method is required for `io` module compatibility.
420
        temp = self.read(len(b))
421
        if len(temp) == 0:
422
            return 0
423
        else:
424
            b[:len(temp)] = temp
425
            return len(temp)
426

    
427
    def _update_chunk_length(self):
428
        # First, we'll figure out length of a chunk and then
429
        # we'll try to read it from socket.
430
        if self.chunk_left is not None:
431
            return
432
        line = self._fp.fp.readline()
433
        line = line.split(b';', 1)[0]
434
        try:
435
            self.chunk_left = int(line, 16)
436
        except ValueError:
437
            # Invalid chunked protocol response, abort.
438
            self.close()
439
            raise httplib.IncompleteRead(line)
440

    
441
    def _handle_chunk(self, amt):
442
        returned_chunk = None
443
        if amt is None:
444
            chunk = self._fp._safe_read(self.chunk_left)
445
            returned_chunk = chunk
446
            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
447
            self.chunk_left = None
448
        elif amt < self.chunk_left:
449
            value = self._fp._safe_read(amt)
450
            self.chunk_left = self.chunk_left - amt
451
            returned_chunk = value
452
        elif amt == self.chunk_left:
453
            value = self._fp._safe_read(amt)
454
            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
455
            self.chunk_left = None
456
            returned_chunk = value
457
        else:  # amt > self.chunk_left
458
            returned_chunk = self._fp._safe_read(self.chunk_left)
459
            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
460
            self.chunk_left = None
461
        return returned_chunk
462

    
463
    def read_chunked(self, amt=None, decode_content=None):
464
        """
465
        Similar to :meth:`HTTPResponse.read`, but with an additional
466
        parameter: ``decode_content``.
467

468
        :param decode_content:
469
            If True, will attempt to decode the body based on the
470
            'content-encoding' header.
471
        """
472
        self._init_decoder()
473
        # FIXME: Rewrite this method and make it a class with a better structured logic.
474
        if not self.chunked:
475
            raise ResponseNotChunked(
476
                "Response is not chunked. "
477
                "Header 'transfer-encoding: chunked' is missing.")
478

    
479
        # Don't bother reading the body of a HEAD request.
480
        if self._original_response and is_response_to_head(self._original_response):
481
            self._original_response.close()
482
            return
483

    
484
        with self._error_catcher():
485
            while True:
486
                self._update_chunk_length()
487
                if self.chunk_left == 0:
488
                    break
489
                chunk = self._handle_chunk(amt)
490
                decoded = self._decode(chunk, decode_content=decode_content,
491
                                       flush_decoder=False)
492
                if decoded:
493
                    yield decoded
494

    
495
            if decode_content:
496
                # On CPython and PyPy, we should never need to flush the
497
                # decoder. However, on Jython we *might* need to, so
498
                # lets defensively do it anyway.
499
                decoded = self._flush_decoder()
500
                if decoded:  # Platform-specific: Jython.
501
                    yield decoded
502

    
503
            # Chunk content ends with \r\n: discard it.
504
            while True:
505
                line = self._fp.fp.readline()
506
                if not line:
507
                    # Some sites may not end with '\r\n'.
508
                    break
509
                if line == b'\r\n':
510
                    break
511

    
512
            # We read everything; close the "file".
513
            if self._original_response:
514
                self._original_response.close()