From eb5736156bdb736f8197d92fc83b799586ae492e Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Wed, 26 Jun 2013 20:00:08 +0200 Subject: ftp*.py: add FTP scripts - ftp-get.py: sequentially download a list of files with one connection. - ftp-list.py: recursively show directory contents. - ftp.py: supports "MLSD" which is missing in inetutils. --- ftp-get.py | 110 ++++++++++++++++++ ftp-list.py | 100 ++++++++++++++++ ftp.py | 372 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 582 insertions(+) create mode 100755 ftp-get.py create mode 100755 ftp-list.py create mode 100755 ftp.py diff --git a/ftp-get.py b/ftp-get.py new file mode 100755 index 0000000..ed47402 --- /dev/null +++ b/ftp-get.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# Retrieve a list of files from one server. Saves modification time and supports +# download resumption. +# +# Copyright (C) 2013 Peter Wu + +import sys +from ftplib import FTP +import re +import os, os.path +from datetime import datetime, timezone + +user = "anonymous" +passwd = "anon" +host = None +port = 21 +path = "/" + +outdir = os.getcwd() + +patt_url = re.compile(r""" +^ +(?:ftp://)? +(?: + (?P.+?) + (?: + :(?P.+?) + )? + @ +)? +(?P[a-zA-Z0-9.-]+) +(:(?P\d+))? +(?P/.*)? +$ +""", re.VERBOSE) + +if len(sys.argv) >= 2: + m = patt_url.match(sys.argv[1]) + if m: + if m.group("user") is not None: + user = m.group("user") + if m.group("passwd") is not None: + passwd = m.group("passwd") + host = m.group("host") + if m.group("port") is not None: + port = int(m.group("port")) + if m.group("path") is not None: + path = m.group("path") + print("Warning: path component is ignored", file=sys.stderr) + +if host is None: + print("Usage: python", sys.argv[0], + "[ftp://][user[:pass]@]ftp.example.com[:21][/path]", + file=sys.stderr) + print("A file list is read from stdin", file=sys.stderr) + sys.exit(1) + +files = [] +for line in sys.stdin: + file_path = line.rstrip("\r\n") + # note: os.path.join(outdir, file_path) throws away if file_path + # is absolute. That is *not* intended here. + local_file_path = outdir + "/" + file_path + files.append((file_path, local_file_path)) + +if not files: + print("Nothing to do", file=sys.stderr) + sys.exit(1) + +with FTP() as ftp: + ftp.connect(host, port) + ftp.login(user, passwd) + print(ftp.getwelcome(), file=sys.stderr) + + current = 0 + total = len(files) + + for file_path, local_file_path in files: + current = current + 1 + print("Processing %d/%d: %s" % (current, total, file_path)) + + # TODO: do something with path parameter as passed in a URL? + remote_file_path = file_path + + # do not send a REST(art) command when starting at the beginning + offset = None + file_size = ftp.size(remote_file_path) + if file_size is not None: + try: + local_size = os.path.getsize(local_file_path) + if local_size > file_size: + print("Local size %d is larger than remote %d, restarting %s" % (local_size, file_size, file_path)) + elif local_size == file_size: + # assume fully downloaded. Maybe check mtime? + print("Already completed:", file_path) + continue + elif local_size > 0: + offset = local_size + print("Downloading %d remaining bytes of %s" % file_size - offset, file_path) + except OSError: + pass + + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + with open(local_file_path, "ab") as local_file: + ftp.retrbinary("RETR " + remote_file_path, local_file.write, rest=offset) + + # adjust modification times (server returns UTC) + timeval = ftp.sendcmd("MDTM " + remote_file_path)[4:] + mtime = datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc).timestamp() + os.utime(local_file_path, times=(mtime, mtime)) diff --git a/ftp-list.py b/ftp-list.py new file mode 100755 index 0000000..1b65832 --- /dev/null +++ b/ftp-list.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# Recursively list FTP directory contents +# +# Copyright (C) 2013 Peter Wu + +# Prepend directory name to files and list files: +# awk '/^Directory/{if($0 != "Directory /")dir=substr($0, index($0, "/"))}/^-/{n=index($0,$9);print substr($0,1, n - 1) dir "/" substr($0,n)}' + +import sys +from ftplib import FTP +import re + +user = "anonymous" +passwd = "anon" +host = None +port = 21 +path = "/" + +patt_url = re.compile(r""" +^ +(?:ftp://)? +(?: + (?P.+?) + (?: + :(?P.+?) + )? + @ +)? +(?P[a-zA-Z0-9.-]+) +(:(?P\d+))? +(?P/.*)? +$ +""", re.VERBOSE) + +if len(sys.argv) >= 2: + m = patt_url.match(sys.argv[1]) + if m: + if m.group("user") is not None: + user = m.group("user") + if m.group("passwd") is not None: + passwd = m.group("passwd") + host = m.group("host") + if m.group("port") is not None: + port = int(m.group("port")) + if m.group("path") is not None: + path = m.group("path") + +if host is None: + print("Usage: python", sys.argv[0], + "[ftp://][user[:pass]@]ftp.example.com[:21][/path]", + file=sys.stderr) + sys.exit(1) + +# directories for which the contents needs to be retrieved +dirs = [] +dirs.append(path) + +patt_list = re.compile(r""" +^ +.[-rwxXst]{9}\ + # permissions +\d+\ + # links count +\S+\ + # owner +\S+\ + # group +\d+\ + # size +\S{3}\ + # month +\d{2}\ + # day +(?:\d{4,}|\d{2}:\d{2}) # year or time +\ (.+?) # name +(?:\ ->\ (.+))? # symlink +[\r\n]* # line contains +$ +""", re.VERBOSE) + +def get_ls_processor(cwd): + def process_dir(line): + print(line) + if line[0] == 'd': + m = patt_list.match(line) + name = m.group(1) + if cwd == "/": + dir = "/" + name + else: + dir = cwd + "/" + name + print("Queuing", dir, file=sys.stderr) + dirs.append(dir) +# elif line[0] == 'l': +# m = patt_list.match(line) +# print("Link", m.group(2)) + return process_dir + +with FTP() as ftp: + ftp.connect(host, port) + ftp.login(user, passwd) + print(ftp.getwelcome(), file=sys.stderr) + while dirs: + dir = dirs.pop() + ftp.cwd(dir) + print("Directory", ftp.pwd()) + ftp.retrlines('LIST', get_ls_processor(dir)) + print("Queue size:", len(dirs), file=sys.stderr) diff --git a/ftp.py b/ftp.py new file mode 100755 index 0000000..b562d20 --- /dev/null +++ b/ftp.py @@ -0,0 +1,372 @@ +#!/usr/bin/python +# A small FTP shell +# +# Copyright (C) 2013 Peter Wu + +import sys +from ftplib import FTP, all_errors +import re +import os, os.path +from datetime import datetime, timezone, date +import readline # for enhanced input() +import subprocess +import time +import math + +try: + # Python 2 input() acts like eval(input()) - just NO! + input = raw_input +except NameError: + pass + +user = "anonymous" +passwd = "anon" +host = None +port = 21 +path = "/" + +outdir = os.getcwd() + +patt_url = re.compile(r""" +^ +(?:ftp://)? +(?: + (?P.+?) + (?: + :(?P.+?) + )? + @ +)? +(?P[a-zA-Z0-9.-]+) +(:(?P\d+))? +(?P/.*)? +$ +""", re.VERBOSE) + +if len(sys.argv) >= 2: + m = patt_url.match(sys.argv[1]) + if m: + if m.group("user") is not None: + user = m.group("user") + if m.group("passwd") is not None: + passwd = m.group("passwd") + host = m.group("host") + if m.group("port") is not None: + port = int(m.group("port")) + if m.group("path") is not None: + path = m.group("path") + print("Warning: path component is ignored", file=sys.stderr) + +if host is None: + print("Usage: python", sys.argv[0], + "[ftp://][user[:pass]@]ftp.example.com[:21][/path]", + file=sys.stderr) + sys.exit(1) + +def format_perms(mode): + """Turns a numeric UNIX mode into human-readable form + """ + str = "" + for i in range(0, 3): + o = 0o100 >> (3 * i) + str += "r" if mode & (4 * o) else "-" + str += "w" if mode & (2 * o) else "-" + if mode & (0o4000 >> i): # setuid, setgid or sticky bit + if o == 0o001: # "world" + str += "t" if mode & o else "T" + else: + str += "s" if mode & o else "S" + else: + str += "x" if mode & (1 * o) else "-" + return str + +def format_type_fact(type): + if type == "file": + return "-" + elif type in ("cdir", "pdir", "dir"): + return "d" + # TODO: handle OS.name=type + else: + return "?" + +def dt_from_ftp(timeval): + return datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc) + +def format_mlsd(name, facts): + + if "type" in facts: + mode_desc = format_type_fact(facts["type"]) + else: + mode_desc = "?" + + if "unix.mode" in facts: + perm = int(facts["unix.mode"], 8) + mode_desc += format_perms(perm) + else: + mode_desc += "?" * 9 + + user = "?" if not "unix.owner" in facts else facts["unix.owner"] + group = "?" if not "unix.group" in facts else facts["unix.group"] + size = "" if not "size" in facts else int(facts["size"]) + + modtime = 0 if not "modify" in facts else dt_from_ftp(facts["modify"]) + if date.today().year == modtime.year: + date_str = modtime.strftime("%b %d %H:%M") + else: + date_str = modtime.strftime("%b %d %Y") + + line = mode_desc + " " + #line += " {links:4s}".format(links=-1) + line += " {user:8s} {group:8s} {size:8}".format(user=user, group=group, size=size) + line += " " + date_str + " " + name + return line + +def format_bytes(bytes): + if bytes == 0: + return "--.-K" + elif bytes < 1024: + return str(bytes) + "B" + if bytes < 1024**2: + num = bytes / 1024 + pfx = "KB" + elif bytes < 1024**3: + num = bytes / 1024**2 + pfx = "MB" + elif bytes < 1024**4: + num = bytes / 1024**3 + pfx = "GB" + + if num < 10: + fmt_str = "{:.2f}{}" + elif num < 100: + fmt_str = "{:.1f}{}" + else: + fmt_str = "{:.0f}{}" + + return fmt_str.format(num, pfx) + +def format_time(seconds): + fmt_str = "" + seconds = int(seconds) + if seconds < 100: + return "{0}s".format(seconds) + elif seconds < 100 * 60: + return "{0}m {1}s".format(seconds // 60, seconds % 60) + elif seconds < 48 * 60 * 60: + mins = seconds // 60 + return "{0}h {1}m".format(mins // 60, mins % 60) + elif seconds < 100 * 24 * 60 * 60: + hours = seconds // 3600 + return "{0}d {1}h".format(hours // 24, hours % 24) + else: + days = seconds // (3600 * 24) + return "{0}d".format(days) + +def download_file(ftp, file_path, local_file_path, size=None, offset=0): + filename = os.path.basename(file_path) + local_dirs = os.path.dirname(local_file_path) + if local_dirs: + os.makedirs(local_dirs, exist_ok=True) + + if size is None: + size = ftp.size(file_path) + + BAR_WIDTH = 50 + if size is not None: + progress_text = "\r{percent:^4.0%}[{bar_done:" + progress_text += str(BAR_WIDTH) + "}] {bytes:" + size_len = len(str(size)) + progress_text += str(int(1 + (size_len - 1) * 4 / 3)) + progress_text += ",d} {rate:>8}/s {eta:11}" + else: + progress_text = "\rRetrieved {bytes} bytes {rate:>6}/s {eta:11}" + + print("Downloading {} ({} bytes)".format(filename, + "unknown" if size is None else size)) + + begin_tsp = time.time() + bytes_sofar = 0 + def get_writer(local_file): + sample_bytes = 0 + sample_tsp = begin_tsp + sample_rate = 0 + def writer(data): + nonlocal bytes_sofar, sample_bytes, sample_tsp, sample_rate + + local_file.write(data) + bytes_sofar += len(data) + percent_done = 0 + + now_tsp = time.time() + timediff = abs(now_tsp - sample_tsp) + bytediff = bytes_sofar - sample_bytes + if timediff >= 1 and bytediff > 0: + sample_tsp, sample_bytes = now_tsp, bytes_sofar + sample_rate = bytediff / timediff + elif bytediff == 0 and timediff >= 2: + sample_tsp, sample_bytes = now_tsp, bytes_sofar + sample_rate = 0 + # else timediff too small or bytediff zero + + rate_str = format_bytes(int(sample_rate)) + eta_str = "eta unknown" + if size is not None and bytes_sofar + offset <= size: + percent_done = 1.0 * (bytes_sofar + offset) / size + bytes_left = size - offset - bytes_sofar + + if sample_rate > 0: + eta_sec = bytes_left / sample_rate + eta_str = "eta " + format_time(math.ceil(eta_sec)) + + print(progress_text.format( + percent = percent_done, + bytes = offset + bytes_sofar, + bar_done = "=" * int((BAR_WIDTH - 1) * percent_done) + ">", + rate = rate_str, + eta = eta_str), end="") + return writer + + write_mode = "ab" if offset else "wb" + with open(local_file_path, write_mode) as local_file: + ftp.retrbinary("RETR " + file_path, get_writer(local_file), + rest=offset if offset else None) + + duration = time.time() - begin_tsp + rate_str = format_bytes(int(bytes_sofar / duration)) + print(progress_text.format( + percent = 1.00, + bytes = offset + bytes_sofar, + bar_done = "=" * (BAR_WIDTH - 1) + ">", + rate = rate_str, + eta = "in " + format_time(math.ceil(duration)))) + + # adjust modification times (server returns UTC) + timeval = ftp.sendcmd("MDTM " + file_path)[4:] + mtime = dt_from_ftp(timeval).timestamp() + os.utime(local_file_path, times=(mtime, mtime)) + +def reget_file(ftp, file_path, local_file_path): + # do not send a REST(art) command when starting at the beginning + offset = None + file_size = ftp.size(file_path) + if file_size is not None: + try: + local_size = os.path.getsize(local_file_path) + if local_size > file_size: + print("Local size %d is larger than remote %d" % (local_size, file_size)) + return + elif local_size == file_size: + # assume fully downloaded. Maybe check mtime? + print("Already completed:", file_path) + return + elif local_size > 0: + offset = local_size + print("Downloading {0:d} remaining bytes of {1}".format(file_size - offset, file_path)) + except OSError: + pass + + download_file(ftp, file_path, local_file_path, size=file_size, offset=offset) + +# Currently, the values indicate the allowed argument count +cmds = { + "pwd": (0,), + "dir": (0, 1), + "cd": (1,), + "ls": (0, 1), + "mlsd": (0, 1), + "get": (1, 2), # TODO: 2 is not implemented yet + "reget": (1, 2), + "lcd": (1,), + "rhelp": (0,), + "help": (0,), + "bye": (0,), + "quit": (0,), + "!": (0, 1) +} + +with FTP() as ftp: + ftp.connect(host, port) + ftp.login(user, passwd) + print(ftp.getwelcome()) + while True: + try: + cmd = input("ftp> ") + try: + if cmd.startswith("!"): + cmd, value = "!", cmd[1:] + else: + cmd, value = cmd.split(" ", 2) + except ValueError: + value = "" + if not cmd: + continue + if cmd not in cmds: + print("?Invalid command") + continue + if value == "" and 0 not in cmds[cmd]: + print("Missing argument for", cmd) + continue + except KeyboardInterrupt: + print("") + continue + except EOFError: + print("") + try: + print(ftp.quit()) + except EOFError: + pass + break + + try: + if cmd == "pwd": + print(ftp.pwd()) + elif cmd in ("dir", "ls"): + ftp.dir(value) + elif cmd == "cd": + print(ftp.cwd(value)) + elif cmd == "mlsd": + for name, facts in ftp.mlsd(value): + if name not in (".", ".."): + print(format_mlsd(name, facts)) + elif cmd in "get": + save_file = os.path.basename(value) + download_file(ftp, value, save_file) + elif cmd in "reget": + save_file = os.path.basename(value) + reget_file(ftp, value, save_file) + elif cmd == "lcd": + os.chdir(value) + print("Local directory is now", os.getcwd()) + elif cmd == "rhelp": + print(ftp.sendcmd("help")) + elif cmd == "help": + print("Commands are: " + " ".join(cmds.keys())) + elif cmd == "!": + if value: + subprocess.call(value, shell=True) + else: + shell = os.getenv("SHELL") + shell = shell if shell else "sh" + subprocess.call([shell]) + elif cmd in ("bye", "quit"): + try: + print(ftp.quit()) + except EOFError: + pass + break + except KeyboardInterrupt: + print("") + try: + ftp.voidcmd("PWD") + except EOFError: + print("Not connected.") + except all_errors as e: + # assume PWD is always possible and that any errors must result + # from the previous operation + print("Error:", e) + # discard good PWD response + ftp.voidresp() + except all_errors as e: + print("Error:", e) + except OSError as e: # for get, reget + print("ftp: local:", e) -- cgit v1.2.1