From eb5736156bdb736f8197d92fc83b799586ae492e Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Wed, 26 Jun 2013 20:00:08 +0200 Subject: ftp*.py: add FTP scripts - ftp-get.py: sequentially download a list of files with one connection. - ftp-list.py: recursively show directory contents. - ftp.py: supports "MLSD" which is missing in inetutils. --- ftp-get.py | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100755 ftp-get.py (limited to 'ftp-get.py') diff --git a/ftp-get.py b/ftp-get.py new file mode 100755 index 0000000..ed47402 --- /dev/null +++ b/ftp-get.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# Retrieve a list of files from one server. Saves modification time and supports +# download resumption. +# +# Copyright (C) 2013 Peter Wu + +import sys +from ftplib import FTP +import re +import os, os.path +from datetime import datetime, timezone + +user = "anonymous" +passwd = "anon" +host = None +port = 21 +path = "/" + +outdir = os.getcwd() + +patt_url = re.compile(r""" +^ +(?:ftp://)? +(?: + (?P.+?) + (?: + :(?P.+?) + )? + @ +)? +(?P[a-zA-Z0-9.-]+) +(:(?P\d+))? +(?P/.*)? +$ +""", re.VERBOSE) + +if len(sys.argv) >= 2: + m = patt_url.match(sys.argv[1]) + if m: + if m.group("user") is not None: + user = m.group("user") + if m.group("passwd") is not None: + passwd = m.group("passwd") + host = m.group("host") + if m.group("port") is not None: + port = int(m.group("port")) + if m.group("path") is not None: + path = m.group("path") + print("Warning: path component is ignored", file=sys.stderr) + +if host is None: + print("Usage: python", sys.argv[0], + "[ftp://][user[:pass]@]ftp.example.com[:21][/path]", + file=sys.stderr) + print("A file list is read from stdin", file=sys.stderr) + sys.exit(1) + +files = [] +for line in sys.stdin: + file_path = line.rstrip("\r\n") + # note: os.path.join(outdir, file_path) throws away if file_path + # is absolute. That is *not* intended here. + local_file_path = outdir + "/" + file_path + files.append((file_path, local_file_path)) + +if not files: + print("Nothing to do", file=sys.stderr) + sys.exit(1) + +with FTP() as ftp: + ftp.connect(host, port) + ftp.login(user, passwd) + print(ftp.getwelcome(), file=sys.stderr) + + current = 0 + total = len(files) + + for file_path, local_file_path in files: + current = current + 1 + print("Processing %d/%d: %s" % (current, total, file_path)) + + # TODO: do something with path parameter as passed in a URL? + remote_file_path = file_path + + # do not send a REST(art) command when starting at the beginning + offset = None + file_size = ftp.size(remote_file_path) + if file_size is not None: + try: + local_size = os.path.getsize(local_file_path) + if local_size > file_size: + print("Local size %d is larger than remote %d, restarting %s" % (local_size, file_size, file_path)) + elif local_size == file_size: + # assume fully downloaded. Maybe check mtime? + print("Already completed:", file_path) + continue + elif local_size > 0: + offset = local_size + print("Downloading %d remaining bytes of %s" % file_size - offset, file_path) + except OSError: + pass + + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + with open(local_file_path, "ab") as local_file: + ftp.retrbinary("RETR " + remote_file_path, local_file.write, rest=offset) + + # adjust modification times (server returns UTC) + timeval = ftp.sendcmd("MDTM " + remote_file_path)[4:] + mtime = datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc).timestamp() + os.utime(local_file_path, times=(mtime, mtime)) -- cgit v1.2.1