summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2018-05-08 22:45:04 +0200
committerPeter Wu <peter@lekensteyn.nl>2018-05-08 23:05:00 +0200
commitdedb9339ef45810daff2b068ff8d434927547c27 (patch)
treec39fe36186c480fa02e4498518fbce2d94f52568
parentda8a76f3c7a537fb1b155f1ba7d19591845b28bf (diff)
downloadscripts-dedb9339ef45810daff2b068ff8d434927547c27.tar.gz
arch-proxy.py: support mirrors and Arch Linux Archive (ALA)
For testing older packages while reusing a local package cache. Fix miscellaneous issues like printing stack traces when -Syu while the cache is up to date (peer would close connection).
-rwxr-xr-xarch-proxy.py117
1 files changed, 98 insertions, 19 deletions
diff --git a/arch-proxy.py b/arch-proxy.py
index 56f231f..9da960c 100755
--- a/arch-proxy.py
+++ b/arch-proxy.py
@@ -27,7 +27,7 @@ class BadRequest(Exception):
pass
class RequestHandler(http.server.BaseHTTPRequestHandler):
- def send_ok(self, size, headers={}, cached=False, range_offset=None):
+ def send_ok(self, size, headers={}, upstream=None, range_offset=None):
if range_offset is None:
code = 200
else:
@@ -39,7 +39,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
headers["Content-Range"] = content_range
size -= range_offset
self.log_message('"%s" %d %s %s', self.requestline, code, size,
- "HIT" if cached else "MISS")
+ "HIT" if upstream is None else "MISS:%s" % (upstream,))
self.send_response_only(code)
self.send_header('Content-Length', size)
for k, v in headers.items():
@@ -47,14 +47,33 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
self.end_headers()
def request_data(self, head_only=False, mtime_out=None, range_offset=None):
+ """
+ Retrieves the full response body. The given "range_offset" serves only
+ as hint for the response to the client, it is not used with the upstream
+ request.
+ """
method = "HEAD" if head_only else "GET"
- url = self.get_upstream_url()
- with closing(requests.request(method, url, stream=not head_only)) as r:
- if r.status_code != 200:
- self.log_request(r.status_code)
- self.send_response_only(r.status_code)
- self.end_headers()
- return
+ streamable = not head_only
+ status_code = None
+ urls = list(self.get_upstream_urls())
+ # Try each upstream. If one fails, log it and try another. On success,
+ # return the response data. If all upstreams fail, fail the request.
+ for i, url in enumerate(urls):
+ with closing(requests.request(method, url, stream=streamable)) as r:
+ status_code = r.status_code
+ if status_code == 200:
+ yield from self.process_upstream_response(r, head_only,
+ mtime_out, i, range_offset)
+ return
+ self.log_message('"%s" %d - SKIP:%d', self.requestline,
+ status_code, i)
+ self.log_request(status_code)
+ self.send_response_only(status_code)
+ self.end_headers()
+
+ def process_upstream_response(self, r, head_only, mtime_out, upstream,
+ range_offset):
+ if r:
response_headers = {}
if 'Last-Modified' in r.headers:
try:
@@ -65,7 +84,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
except ValueError:
self.log_error("Unable to parse Last-Modified header")
self.send_ok(int(r.headers['Content-Length']), response_headers,
- range_offset=range_offset)
+ upstream=upstream, range_offset=range_offset)
if not head_only:
yield from r.iter_content(4096)
@@ -142,7 +161,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
stat_info = os.stat(path)
response_headers = {'Last-Modified':
epoch_to_text(stat_info.st_mtime)}
- self.send_ok(stat_info.st_size, response_headers, cached=True,
+ self.send_ok(stat_info.st_size, response_headers,
range_offset=range_offset)
if not head_only:
with open(path, 'rb') as f:
@@ -162,6 +181,9 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
with self.open_write_cache(path) as cache_file:
cache_ok = cache_file is not None
if cache_ok:
+ # Overwrite the temporary cache file from begin to end,
+ # but do not write include the first "range_offset"
+ # bytes in the response.
skip = range_offset
for chunk in remote_data:
cache_file.write(chunk)
@@ -181,6 +203,8 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
if data:
for chunk in data:
self.wfile.write(chunk)
+ except (BrokenPipeError, ConnectionResetError):
+ self.log_error("GET %s - (connection aborted)", self.path)
except BadRequest as e:
self.log_error("GET %s - Bad Request: %s", self.path, e)
self.send_response(400)
@@ -192,17 +216,24 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
def do_HEAD(self):
try:
list(self.request_data_with_cache(True))
+ except (BrokenPipeError, ConnectionResetError):
+ self.log_error("HEAD %s - (connection aborted)", self.path)
except BadRequest as e:
- self.log_error("GET %s - Bad Request: %s", self.path, e)
+ self.log_error("HEAD %s - Bad Request: %s", self.path, e)
self.send_response(400)
except Exception as e:
self.log_error("HEAD %s failed: %s", self.path, e)
import traceback; traceback.print_exc()
self.send_response(502)
- def get_upstream_url(self):
- prefix = "http://mirror.nl.leaseweb.net/archlinux/"
- return prefix + self.path
+ def get_upstream_urls(self):
+ # If an old version is requested, retrieve the databases from the
+ # archive mirror and do not fallback.
+ if self.server.archive_url and self.is_date_sensitive_request():
+ yield self.server.archive_url + self.path
+ return
+ for prefix in self.server.mirrors:
+ yield prefix + self.path
def get_local_path(self):
filename = os.path.basename(self.path)
@@ -212,6 +243,14 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
"""Whether the requested file should be cached."""
return self.path.endswith(".pkg.tar.xz")
+ def is_date_sensitive_request(self):
+ """Whether the resource is ephemeral."""
+ path = self.path
+ if path.endswith(".sig"):
+ path = path[:-4]
+ suffixes = [".db", ".files", ".abs.tar.gz"]
+ return any(path.endswith(suffix) for suffix in suffixes)
+
class SomeServer(http.server.HTTPServer):
def __init__(self, addr, handler, args):
self.allow_reuse_address = True
@@ -220,14 +259,54 @@ class SomeServer(http.server.HTTPServer):
super().__init__(addr, handler)
self.cachedir = args.cachedir
self.is_readonly = args.readonly
+ self.mirrors = args.mirrors
+ if not args.date:
+ self.archive_url = None
+ else:
+ archive_mirror = "https://archive.archlinux.org/repos/"
+ self.archive_url = archive_mirror + args.date + "/"
+ self.mirrors.append(self.archive_url)
+
+ def dump_config(self):
+ yesno = lambda x: "yes" if x else "no"
+ print("Listen address: %s:%s" % self.socket.getsockname()[:2])
+ print("Cache directory: %s" % self.cachedir)
+ print("Read-only cache: %s" % yesno(self.is_readonly))
+ print("Using archive: %s" % yesno(self.archive_url))
+ print("Mirrors:")
+ for mirror in self.mirrors:
+ print(" %s" % mirror)
+
+def mirror_url(string):
+ scheme = string.split(":", 1)[0]
+ if scheme not in ("http", "https"):
+ raise argparse.ArgumentTypeError("%s is not a valid URL" % string)
+ return string.rstrip("/") + "/"
+
+def parse_date(string):
+ m = re.match(r'^(\d{4})([/-]?)(\d{2})\2(\d{2})$', string)
+ if not m:
+ raise argparse.ArgumentTypeError("%s is not a valid date" % string)
+ y, _, m, d = m.groups()
+ return "%s/%s/%s" % (y, m, d)
-parser = argparse.ArgumentParser()
-parser.add_argument("--readonly", action="store_true")
-parser.add_argument("--cachedir", default=os.getcwd())
-parser.add_argument("--port", type=int, default=8001)
+parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("--readonly", action="store_true",
+ help="Do not write downloaded results to the cache directory")
+parser.add_argument("--cachedir", default=os.getcwd(),
+ help="Cache directory")
+parser.add_argument("--port", type=int, default=8001,
+ help="Listen port")
+parser.add_argument("--date", type=parse_date,
+ help="Provide a repository snapshot from 'yyyy/mm/dd'")
+parser.add_argument("--mirror", dest="mirrors", metavar='URL', nargs="+",
+ type=mirror_url, default=["https://mirror.nl.leaseweb.net/archlinux/"],
+ help="Mirror list")
if __name__ == '__main__':
args = parser.parse_args()
addr = ('', args.port)
server = SomeServer(addr, RequestHandler, args)
+ server.dump_config()
server.serve_forever()