#!/usr/bin/python # Retrieve a list of files from one server. Saves modification time and supports # download resumption. # # Copyright (C) 2013 Peter Wu from __future__ import print_function, division import sys from ftplib import FTP import re import os, os.path from datetime import datetime, timezone user = "anonymous" passwd = "anon" host = None port = 21 path = "/" outdir = os.getcwd() patt_url = re.compile(r""" ^ (?:ftp://)? (?: (?P.+?) (?: :(?P.+?) )? @ )? (?P[a-zA-Z0-9.-]+) (:(?P\d+))? (?P/.*)? $ """, re.VERBOSE) if len(sys.argv) >= 2: m = patt_url.match(sys.argv[1]) if m: if m.group("user") is not None: user = m.group("user") if m.group("passwd") is not None: passwd = m.group("passwd") host = m.group("host") if m.group("port") is not None: port = int(m.group("port")) if m.group("path") is not None: path = m.group("path") if not path.endswith("/"): print("Path must be a directory", file=sys.stderr) sys.exit(1) if host is None: print("Usage: python", sys.argv[0], "[ftp://][user[:pass]@]ftp.example.com[:21][/path]", file=sys.stderr) print("A file list is read from stdin", file=sys.stderr) sys.exit(1) files = [] for line in sys.stdin: file_path = line.rstrip("\r\n").lstrip("/") # note: os.path.join(outdir, file_path) throws away if file_path # is absolute. That is *not* intended here. local_file_path = outdir + "/" + file_path files.append((path + file_path, local_file_path)) if not files: print("Nothing to do", file=sys.stderr) sys.exit(1) with FTP() as ftp: ftp.connect(host, port) ftp.login(user, passwd) print(ftp.getwelcome(), file=sys.stderr) current = 0 total = len(files) for file_path, local_file_path in files: current = current + 1 print("Processing %d/%d: %s" % (current, total, file_path)) # TODO: do something with path parameter as passed in a URL? remote_file_path = file_path # do not send a REST(art) command when starting at the beginning offset = None file_size = ftp.size(remote_file_path) if file_size is not None: try: local_size = os.path.getsize(local_file_path) if local_size > file_size: print("Local size %d is larger than remote %d, restarting %s" % (local_size, file_size, file_path)) elif local_size == file_size: # assume fully downloaded. Maybe check mtime? print("Already completed:", file_path) continue elif local_size > 0: offset = local_size print("Downloading %d remaining bytes of %s" % file_size - offset, file_path) except OSError: pass os.makedirs(os.path.dirname(local_file_path), exist_ok=True) with open(local_file_path, "ab") as local_file: ftp.retrbinary("RETR " + remote_file_path, local_file.write, rest=offset) # adjust modification times (server returns UTC) timeval = ftp.sendcmd("MDTM " + remote_file_path)[4:] mtime = datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc).timestamp() os.utime(local_file_path, times=(mtime, mtime))