summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2017-03-04 16:35:43 +0100
committerPeter Wu <peter@lekensteyn.nl>2017-03-04 16:35:43 +0100
commitda8a76f3c7a537fb1b155f1ba7d19591845b28bf (patch)
tree72fcdce501ef5ea57ddced08abc7e0e72a4d365d
parente476230f0ee46e496da0e23f8352a464530fd6ad (diff)
downloadscripts-da8a76f3c7a537fb1b155f1ba7d19591845b28bf.tar.gz
arch-proxy.py: support resumed downloads
Quick hack to support one specific range request type: resume downloading at some offset until the end. The full file is still downloaded from upstream as needed.
-rwxr-xr-xarch-proxy.py84
1 files changed, 72 insertions, 12 deletions
diff --git a/arch-proxy.py b/arch-proxy.py
index d40cb4c..56f231f 100755
--- a/arch-proxy.py
+++ b/arch-proxy.py
@@ -10,6 +10,7 @@
import argparse
import http.server
import os
+import re
import socket
from datetime import datetime
import requests
@@ -22,17 +23,30 @@ def text_to_epoch(text):
def epoch_to_text(epoch):
return datetime.fromtimestamp(epoch).strftime(DATE_FORMAT)
+class BadRequest(Exception):
+ pass
+
class RequestHandler(http.server.BaseHTTPRequestHandler):
- def send_ok(self, size, headers={}, cached=False):
- self.log_message('"%s" %d %s %s', self.requestline, 200, size,
+ def send_ok(self, size, headers={}, cached=False, range_offset=None):
+ if range_offset is None:
+ code = 200
+ else:
+ if range_offset >= size:
+ # TODO need different status code
+ raise BadRequest("416 Requested Range Not Satisfiable")
+ code = 206
+ content_range = "bytes %d-%d/%d" % (range_offset, size - 1, size)
+ headers["Content-Range"] = content_range
+ size -= range_offset
+ self.log_message('"%s" %d %s %s', self.requestline, code, size,
"HIT" if cached else "MISS")
- self.send_response_only(200)
+ self.send_response_only(code)
self.send_header('Content-Length', size)
for k, v in headers.items():
self.send_header(k, v)
self.end_headers()
- def request_data(self, head_only=False, mtime_out=None):
+ def request_data(self, head_only=False, mtime_out=None, range_offset=None):
method = "HEAD" if head_only else "GET"
url = self.get_upstream_url()
with closing(requests.request(method, url, stream=not head_only)) as r:
@@ -50,7 +64,8 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
mtime_out[0] = mtime
except ValueError:
self.log_error("Unable to parse Last-Modified header")
- self.send_ok(int(r.headers['Content-Length']), response_headers)
+ self.send_ok(int(r.headers['Content-Length']), response_headers,
+ range_offset=range_offset)
if not head_only:
yield from r.iter_content(4096)
@@ -83,27 +98,63 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
except OSError as e:
self.log_error("Failed to remove %s", temp_path)
+ def parse_range(self):
+ value = self.headers.get('Range')
+ if value is not None:
+ # Only support "continue" range requests, resuming previous
+ # download. Anything more complex is not needed at the moment.
+ m = re.match(r'bytes=(?P<from>\d+)-$', value)
+ if not m:
+ raise BadRequest("Unsupported range request: %s" % value)
+ return int(m.group("from"))
+
+ @staticmethod
+ def skip_range_chunk(chunk, skip_bytes):
+ if skip_bytes:
+ chunksize = len(chunk)
+ if chunksize > skip_bytes:
+ chunk = chunk[skip_bytes:]
+ skip_bytes = None
+ else:
+ chunk = b''
+ skip_bytes -= chunksize
+ return chunk, (skip_bytes or None)
+
+ @classmethod
+ def skip_range(cls, it, skip_bytes):
+ for chunk in it:
+ chunk, skip_bytes = cls.skip_range_chunk(chunk, skip_bytes)
+ if chunk:
+ yield chunk
+
+
def request_data_with_cache(self, head_only=False):
+ range_offset = self.parse_range()
if not self.is_cacheable():
# Not cacheable, directly obtain data and bypass cache
- yield from self.request_data()
+ remote_data = self.request_data(range_offset=range_offset)
+ yield from self.skip_range(remote_data, range_offset)
return
path = self.get_local_path()
try:
# Try to open cached file and yield data from it
- self.send_ok(os.path.getsize(path),
- {'Last-Modified': epoch_to_text(os.stat(path).st_mtime)},
- cached=True)
+ stat_info = os.stat(path)
+ response_headers = {'Last-Modified':
+ epoch_to_text(stat_info.st_mtime)}
+ self.send_ok(stat_info.st_size, response_headers, cached=True,
+ range_offset=range_offset)
if not head_only:
with open(path, 'rb') as f:
+ if range_offset:
+ f.seek(range_offset)
yield from f
except FileNotFoundError:
# File does not exist, so try to pipe upstream
# (optionally writing to cache file)
mtime_pointer = [None]
remote_data = self.request_data(head_only=head_only,
- mtime_out=mtime_pointer)
+ mtime_out=mtime_pointer, range_offset=range_offset)
if head_only:
list(remote_data) # consume yield and StopIteration
if not head_only and remote_data:
@@ -111,15 +162,18 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
with self.open_write_cache(path) as cache_file:
cache_ok = cache_file is not None
if cache_ok:
+ skip = range_offset
for chunk in remote_data:
cache_file.write(chunk)
- yield chunk
+ chunk, skip = self.skip_range_chunk(chunk, skip)
+ if chunk:
+ yield chunk
if cache_ok:
# Write was successful, now fix mtime and rename
self.finish_cache(mtime_pointer[0])
else:
# Cache file unavailable, just pass all data
- yield from remote_data
+ yield from self.skip_range(remote_data, range_offset)
def do_GET(self):
try:
@@ -127,6 +181,9 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
if data:
for chunk in data:
self.wfile.write(chunk)
+ except BadRequest as e:
+ self.log_error("GET %s - Bad Request: %s", self.path, e)
+ self.send_response(400)
except Exception as e:
self.log_error("GET %s failed: %s", self.path, e)
import traceback; traceback.print_exc()
@@ -135,6 +192,9 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
def do_HEAD(self):
try:
list(self.request_data_with_cache(True))
+ except BadRequest as e:
+ self.log_error("GET %s - Bad Request: %s", self.path, e)
+ self.send_response(400)
except Exception as e:
self.log_error("HEAD %s failed: %s", self.path, e)
import traceback; traceback.print_exc()