Skip to content

bpo-42643: Add support for HTTP range requests #24228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 37 additions & 8 deletions Lib/http/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,15 @@
# (Actually, the latter is only true if you know the server configuration
# at the time the request was made!)

__version__ = "0.6"
__version__ = "0.7"

__all__ = [
"HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
"SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
]

import re

import copy
import datetime
import email.utils
Expand Down Expand Up @@ -129,6 +131,10 @@

DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"


HTTP_BYTES_RANGE_HEADER = re.compile(r"bytes=(?P<first>\d+)-(?P<last>\d+)$")


class HTTPServer(socketserver.TCPServer):

allow_reuse_address = 1 # Seems to make sense in testing environment
Expand Down Expand Up @@ -657,7 +663,12 @@ def do_GET(self):
f = self.send_head()
if f:
try:
self.copyfile(f, self.wfile)
if "Range" in self.headers:
res = HTTP_BYTES_RANGE_HEADER.match(string=self.headers.get("Range"))
if res is not None:
self.copyfile(f, self.wfile, int(res.group("first")), int(res.group("last")))
else:
self.copyfile(f, self.wfile)
finally:
f.close()

Expand Down Expand Up @@ -742,10 +753,22 @@ def send_head(self):
self.end_headers()
f.close()
return None

self.send_response(HTTPStatus.OK)
if "Range" in self.headers:
res = HTTP_BYTES_RANGE_HEADER.match(string=self.headers["Range"])
if res is None:
self.send_error(code=HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE,
message="Range header is not a valid single part ranges")
self.end_headers()
f.close()
return None
self.send_response(HTTPStatus.PARTIAL_CONTENT)
self.send_header("Content-Range", f"{self.headers['Range']}/{fs[6]}")
self.send_header("Content-Length", int(res.group("last"))-int(res.group("first")))
else:
self.send_response(HTTPStatus.OK)
self.send_header("Accept-Ranges", "bytes")
self.send_header("Content-Length", str(fs[6]))
self.send_header("Content-type", ctype)
self.send_header("Content-Length", str(fs[6]))
self.send_header("Last-Modified",
self.date_time_string(fs.st_mtime))
self.end_headers()
Expand Down Expand Up @@ -842,8 +865,10 @@ def translate_path(self, path):
path += '/'
return path

def copyfile(self, source, outputfile):
"""Copy all data between two file objects.
def copyfile(self, source, outputfile, start_byte=None, end_byte=None):
"""Copy all data between two file objects if start_byte and end_byte are None.

Otherwise, copy (end_byte-start_byte) bytes data between two file objects.

The SOURCE argument is a file object open for reading
(or anything with a read() method) and the DESTINATION
Expand All @@ -856,7 +881,11 @@ def copyfile(self, source, outputfile):
to copy binary data as well.

"""
shutil.copyfileobj(source, outputfile)
if start_byte is not None and end_byte is not None:
source.seek(start_byte)
outputfile.write(source.read(end_byte))
Copy link
Contributor

@imba-tjd imba-tjd Apr 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fatal error. When you use 2-5, the start_byte is 2 and end_byte is 5. Then you read(5).
The correct behavior is read(4), which is end_byte-start_byte+1. Though I won't suggest whether putting +1 here or previous place.

More importantly, read() will load all data into memory in a row before start writing. This has serious performance issue when resuming large file. I tried source.truncate() but it turns out an io.UnsupportedOperation.

Besides Content-Length needs changing too. So that new testing is recommended to be added, too.

else:
shutil.copyfileobj(source, outputfile)

def guess_type(self, path):
"""Guess the type of a file.
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_httpservers.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,10 @@ def test_html_escape_filename(self):
html_text = '>%s<' % html.escape(filename, quote=False)
self.assertIn(html_text.encode(enc), body)

def test_range_requests(self):
response = self.request(self.base_url + '/test', headers={"Range": "bytes=0-10"})
self.check_status_and_reason(response=response, status=HTTPStatus.PARTIAL_CONTENT, data=b'We are the')


cgi_file1 = """\
#!%s
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SimpleHTTPRequestHandler now supports single part HTTP range requests