aboutsummaryrefslogtreecommitdiff
path: root/Lib
diff options
context:
space:
mode:
authorBruce Merry <bmerry@ska.ac.za>2020-06-25 08:30:21 +0200
committerGitHub <noreply@github.com>2020-06-24 23:30:21 -0700
commit152f0b8beea12e6282d284100b600771b968927a (patch)
treebe477e3f5f3886186aff7d183d21772beb8b607b /Lib
parentbpo-40773: Fix rendering for 'retval' on the pdb page (GH-21081) (diff)
downloadcpython-152f0b8beea12e6282d284100b600771b968927a.tar.gz
cpython-152f0b8beea12e6282d284100b600771b968927a.tar.bz2
cpython-152f0b8beea12e6282d284100b600771b968927a.zip
bpo-41002: Optimize HTTPResponse.read with a given amount (GH-20943)
I've done the implementation for both non-chunked and chunked reads. I haven't benchmarked chunked reads because I don't currently have a convenient way to generate a high-bandwidth chunked stream, but I don't see any reason that it shouldn't enjoy the same benefits that the non-chunked case does. I've used the benchmark attached to the bpo bug to verify that performance now matches the unsized read case. Automerge-Triggered-By: @methane
Diffstat (limited to 'Lib')
-rw-r--r--Lib/http/client.py35
-rw-r--r--Lib/test/test_httplib.py27
2 files changed, 52 insertions, 10 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 019380a7203..500230d5d51 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -448,18 +448,25 @@ class HTTPResponse(io.BufferedIOBase):
self._close_conn()
return b""
+ if self.chunked:
+ return self._read_chunked(amt)
+
if amt is not None:
- # Amount is given, implement using readinto
- b = bytearray(amt)
- n = self.readinto(b)
- return memoryview(b)[:n].tobytes()
+ if self.length is not None and amt > self.length:
+ # clip the read to the "end of response"
+ amt = self.length
+ s = self.fp.read(amt)
+ if not s and amt:
+ # Ideally, we would raise IncompleteRead if the content-length
+ # wasn't satisfied, but it might break compatibility.
+ self._close_conn()
+ elif self.length is not None:
+ self.length -= len(s)
+ if not self.length:
+ self._close_conn()
+ return s
else:
# Amount is not given (unbounded read) so we must check self.length
- # and self.chunked
-
- if self.chunked:
- return self._readall_chunked()
-
if self.length is None:
s = self.fp.read()
else:
@@ -560,7 +567,7 @@ class HTTPResponse(io.BufferedIOBase):
self.chunk_left = chunk_left
return chunk_left
- def _readall_chunked(self):
+ def _read_chunked(self, amt=None):
assert self.chunked != _UNKNOWN
value = []
try:
@@ -568,7 +575,15 @@ class HTTPResponse(io.BufferedIOBase):
chunk_left = self._get_chunk_left()
if chunk_left is None:
break
+
+ if amt is not None and amt <= chunk_left:
+ value.append(self._safe_read(amt))
+ self.chunk_left = chunk_left - amt
+ break
+
value.append(self._safe_read(chunk_left))
+ if amt is not None:
+ amt -= chunk_left
self.chunk_left = 0
return b''.join(value)
except IncompleteRead:
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index e95487bcd45..e909980d23a 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -569,6 +569,33 @@ class BasicTest(TestCase):
resp.close()
self.assertTrue(resp.closed)
+ def test_partial_reads_past_end(self):
+ # if we have Content-Length, clip reads to the end
+ body = "HTTP/1.1 200 Ok\r\nContent-Length: 4\r\n\r\nText"
+ sock = FakeSocket(body)
+ resp = client.HTTPResponse(sock)
+ resp.begin()
+ self.assertEqual(resp.read(10), b'Text')
+ self.assertTrue(resp.isclosed())
+ self.assertFalse(resp.closed)
+ resp.close()
+ self.assertTrue(resp.closed)
+
+ def test_partial_readintos_past_end(self):
+ # if we have Content-Length, clip readintos to the end
+ body = "HTTP/1.1 200 Ok\r\nContent-Length: 4\r\n\r\nText"
+ sock = FakeSocket(body)
+ resp = client.HTTPResponse(sock)
+ resp.begin()
+ b = bytearray(10)
+ n = resp.readinto(b)
+ self.assertEqual(n, 4)
+ self.assertEqual(bytes(b)[:4], b'Text')
+ self.assertTrue(resp.isclosed())
+ self.assertFalse(resp.closed)
+ resp.close()
+ self.assertTrue(resp.closed)
+
def test_partial_reads_no_content_length(self):
# when no length is present, the socket should be gracefully closed when
# all data was read