summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <lekensteyn@gmail.com>2013-06-26 20:00:08 +0200
committerPeter Wu <lekensteyn@gmail.com>2013-06-26 20:00:08 +0200
commiteb5736156bdb736f8197d92fc83b799586ae492e (patch)
treefceeab2428b1935c520c2c92bb22f03126b836a3
parent82c07bc02cafe9b8d6cb9fa2362b2ba6326d6a7b (diff)
downloadscripts-eb5736156bdb736f8197d92fc83b799586ae492e.tar.gz
ftp*.py: add FTP scripts
- ftp-get.py: sequentially download a list of files with one connection. - ftp-list.py: recursively show directory contents. - ftp.py: supports "MLSD" which is missing in inetutils.
-rwxr-xr-xftp-get.py110
-rwxr-xr-xftp-list.py100
-rwxr-xr-xftp.py372
3 files changed, 582 insertions, 0 deletions
diff --git a/ftp-get.py b/ftp-get.py
new file mode 100755
index 0000000..ed47402
--- /dev/null
+++ b/ftp-get.py
@@ -0,0 +1,110 @@
+#!/usr/bin/python
+# Retrieve a list of files from one server. Saves modification time and supports
+# download resumption.
+#
+# Copyright (C) 2013 Peter Wu <lekensteyn@gmail.com>
+
+import sys
+from ftplib import FTP
+import re
+import os, os.path
+from datetime import datetime, timezone
+
+user = "anonymous"
+passwd = "anon"
+host = None
+port = 21
+path = "/"
+
+outdir = os.getcwd()
+
+patt_url = re.compile(r"""
+^
+(?:ftp://)?
+(?:
+ (?P<user>.+?)
+ (?:
+ :(?P<passwd>.+?)
+ )?
+ @
+)?
+(?P<host>[a-zA-Z0-9.-]+)
+(:(?P<port>\d+))?
+(?P<path>/.*)?
+$
+""", re.VERBOSE)
+
+if len(sys.argv) >= 2:
+ m = patt_url.match(sys.argv[1])
+ if m:
+ if m.group("user") is not None:
+ user = m.group("user")
+ if m.group("passwd") is not None:
+ passwd = m.group("passwd")
+ host = m.group("host")
+ if m.group("port") is not None:
+ port = int(m.group("port"))
+ if m.group("path") is not None:
+ path = m.group("path")
+ print("Warning: path component is ignored", file=sys.stderr)
+
+if host is None:
+ print("Usage: python", sys.argv[0],
+ "[ftp://][user[:pass]@]ftp.example.com[:21][/path]",
+ file=sys.stderr)
+ print("A file list is read from stdin", file=sys.stderr)
+ sys.exit(1)
+
+files = []
+for line in sys.stdin:
+ file_path = line.rstrip("\r\n")
+ # note: os.path.join(outdir, file_path) throws away if file_path
+ # is absolute. That is *not* intended here.
+ local_file_path = outdir + "/" + file_path
+ files.append((file_path, local_file_path))
+
+if not files:
+ print("Nothing to do", file=sys.stderr)
+ sys.exit(1)
+
+with FTP() as ftp:
+ ftp.connect(host, port)
+ ftp.login(user, passwd)
+ print(ftp.getwelcome(), file=sys.stderr)
+
+ current = 0
+ total = len(files)
+
+ for file_path, local_file_path in files:
+ current = current + 1
+ print("Processing %d/%d: %s" % (current, total, file_path))
+
+ # TODO: do something with path parameter as passed in a URL?
+ remote_file_path = file_path
+
+ # do not send a REST(art) command when starting at the beginning
+ offset = None
+ file_size = ftp.size(remote_file_path)
+ if file_size is not None:
+ try:
+ local_size = os.path.getsize(local_file_path)
+ if local_size > file_size:
+ print("Local size %d is larger than remote %d, restarting %s" % (local_size, file_size, file_path))
+ elif local_size == file_size:
+ # assume fully downloaded. Maybe check mtime?
+ print("Already completed:", file_path)
+ continue
+ elif local_size > 0:
+ offset = local_size
+ print("Downloading %d remaining bytes of %s" % file_size - offset, file_path)
+ except OSError:
+ pass
+
+ os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
+ with open(local_file_path, "ab") as local_file:
+ ftp.retrbinary("RETR " + remote_file_path, local_file.write, rest=offset)
+
+ # adjust modification times (server returns UTC)
+ timeval = ftp.sendcmd("MDTM " + remote_file_path)[4:]
+ mtime = datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc).timestamp()
+ os.utime(local_file_path, times=(mtime, mtime))
diff --git a/ftp-list.py b/ftp-list.py
new file mode 100755
index 0000000..1b65832
--- /dev/null
+++ b/ftp-list.py
@@ -0,0 +1,100 @@
+#!/usr/bin/python
+# Recursively list FTP directory contents
+#
+# Copyright (C) 2013 Peter Wu <lekensteyn@gmail.com>
+
+# Prepend directory name to files and list files:
+# awk '/^Directory/{if($0 != "Directory /")dir=substr($0, index($0, "/"))}/^-/{n=index($0,$9);print substr($0,1, n - 1) dir "/" substr($0,n)}'
+
+import sys
+from ftplib import FTP
+import re
+
+user = "anonymous"
+passwd = "anon"
+host = None
+port = 21
+path = "/"
+
+patt_url = re.compile(r"""
+^
+(?:ftp://)?
+(?:
+ (?P<user>.+?)
+ (?:
+ :(?P<passwd>.+?)
+ )?
+ @
+)?
+(?P<host>[a-zA-Z0-9.-]+)
+(:(?P<port>\d+))?
+(?P<path>/.*)?
+$
+""", re.VERBOSE)
+
+if len(sys.argv) >= 2:
+ m = patt_url.match(sys.argv[1])
+ if m:
+ if m.group("user") is not None:
+ user = m.group("user")
+ if m.group("passwd") is not None:
+ passwd = m.group("passwd")
+ host = m.group("host")
+ if m.group("port") is not None:
+ port = int(m.group("port"))
+ if m.group("path") is not None:
+ path = m.group("path")
+
+if host is None:
+ print("Usage: python", sys.argv[0],
+ "[ftp://][user[:pass]@]ftp.example.com[:21][/path]",
+ file=sys.stderr)
+ sys.exit(1)
+
+# directories for which the contents needs to be retrieved
+dirs = []
+dirs.append(path)
+
+patt_list = re.compile(r"""
+^
+.[-rwxXst]{9}\ + # permissions
+\d+\ + # links count
+\S+\ + # owner
+\S+\ + # group
+\d+\ + # size
+\S{3}\ + # month
+\d{2}\ + # day
+(?:\d{4,}|\d{2}:\d{2}) # year or time
+\ (.+?) # name
+(?:\ ->\ (.+))? # symlink
+[\r\n]* # line contains
+$
+""", re.VERBOSE)
+
+def get_ls_processor(cwd):
+ def process_dir(line):
+ print(line)
+ if line[0] == 'd':
+ m = patt_list.match(line)
+ name = m.group(1)
+ if cwd == "/":
+ dir = "/" + name
+ else:
+ dir = cwd + "/" + name
+ print("Queuing", dir, file=sys.stderr)
+ dirs.append(dir)
+# elif line[0] == 'l':
+# m = patt_list.match(line)
+# print("Link", m.group(2))
+ return process_dir
+
+with FTP() as ftp:
+ ftp.connect(host, port)
+ ftp.login(user, passwd)
+ print(ftp.getwelcome(), file=sys.stderr)
+ while dirs:
+ dir = dirs.pop()
+ ftp.cwd(dir)
+ print("Directory", ftp.pwd())
+ ftp.retrlines('LIST', get_ls_processor(dir))
+ print("Queue size:", len(dirs), file=sys.stderr)
diff --git a/ftp.py b/ftp.py
new file mode 100755
index 0000000..b562d20
--- /dev/null
+++ b/ftp.py
@@ -0,0 +1,372 @@
+#!/usr/bin/python
+# A small FTP shell
+#
+# Copyright (C) 2013 Peter Wu <lekensteyn@gmail.com>
+
+import sys
+from ftplib import FTP, all_errors
+import re
+import os, os.path
+from datetime import datetime, timezone, date
+import readline # for enhanced input()
+import subprocess
+import time
+import math
+
+try:
+ # Python 2 input() acts like eval(input()) - just NO!
+ input = raw_input
+except NameError:
+ pass
+
+user = "anonymous"
+passwd = "anon"
+host = None
+port = 21
+path = "/"
+
+outdir = os.getcwd()
+
+patt_url = re.compile(r"""
+^
+(?:ftp://)?
+(?:
+ (?P<user>.+?)
+ (?:
+ :(?P<passwd>.+?)
+ )?
+ @
+)?
+(?P<host>[a-zA-Z0-9.-]+)
+(:(?P<port>\d+))?
+(?P<path>/.*)?
+$
+""", re.VERBOSE)
+
+if len(sys.argv) >= 2:
+ m = patt_url.match(sys.argv[1])
+ if m:
+ if m.group("user") is not None:
+ user = m.group("user")
+ if m.group("passwd") is not None:
+ passwd = m.group("passwd")
+ host = m.group("host")
+ if m.group("port") is not None:
+ port = int(m.group("port"))
+ if m.group("path") is not None:
+ path = m.group("path")
+ print("Warning: path component is ignored", file=sys.stderr)
+
+if host is None:
+ print("Usage: python", sys.argv[0],
+ "[ftp://][user[:pass]@]ftp.example.com[:21][/path]",
+ file=sys.stderr)
+ sys.exit(1)
+
+def format_perms(mode):
+ """Turns a numeric UNIX mode into human-readable form
+ """
+ str = ""
+ for i in range(0, 3):
+ o = 0o100 >> (3 * i)
+ str += "r" if mode & (4 * o) else "-"
+ str += "w" if mode & (2 * o) else "-"
+ if mode & (0o4000 >> i): # setuid, setgid or sticky bit
+ if o == 0o001: # "world"
+ str += "t" if mode & o else "T"
+ else:
+ str += "s" if mode & o else "S"
+ else:
+ str += "x" if mode & (1 * o) else "-"
+ return str
+
+def format_type_fact(type):
+ if type == "file":
+ return "-"
+ elif type in ("cdir", "pdir", "dir"):
+ return "d"
+ # TODO: handle OS.name=type
+ else:
+ return "?"
+
+def dt_from_ftp(timeval):
+ return datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc)
+
+def format_mlsd(name, facts):
+
+ if "type" in facts:
+ mode_desc = format_type_fact(facts["type"])
+ else:
+ mode_desc = "?"
+
+ if "unix.mode" in facts:
+ perm = int(facts["unix.mode"], 8)
+ mode_desc += format_perms(perm)
+ else:
+ mode_desc += "?" * 9
+
+ user = "?" if not "unix.owner" in facts else facts["unix.owner"]
+ group = "?" if not "unix.group" in facts else facts["unix.group"]
+ size = "" if not "size" in facts else int(facts["size"])
+
+ modtime = 0 if not "modify" in facts else dt_from_ftp(facts["modify"])
+ if date.today().year == modtime.year:
+ date_str = modtime.strftime("%b %d %H:%M")
+ else:
+ date_str = modtime.strftime("%b %d %Y")
+
+ line = mode_desc + " "
+ #line += " {links:4s}".format(links=-1)
+ line += " {user:8s} {group:8s} {size:8}".format(user=user, group=group, size=size)
+ line += " " + date_str + " " + name
+ return line
+
+def format_bytes(bytes):
+ if bytes == 0:
+ return "--.-K"
+ elif bytes < 1024:
+ return str(bytes) + "B"
+ if bytes < 1024**2:
+ num = bytes / 1024
+ pfx = "KB"
+ elif bytes < 1024**3:
+ num = bytes / 1024**2
+ pfx = "MB"
+ elif bytes < 1024**4:
+ num = bytes / 1024**3
+ pfx = "GB"
+
+ if num < 10:
+ fmt_str = "{:.2f}{}"
+ elif num < 100:
+ fmt_str = "{:.1f}{}"
+ else:
+ fmt_str = "{:.0f}{}"
+
+ return fmt_str.format(num, pfx)
+
+def format_time(seconds):
+ fmt_str = ""
+ seconds = int(seconds)
+ if seconds < 100:
+ return "{0}s".format(seconds)
+ elif seconds < 100 * 60:
+ return "{0}m {1}s".format(seconds // 60, seconds % 60)
+ elif seconds < 48 * 60 * 60:
+ mins = seconds // 60
+ return "{0}h {1}m".format(mins // 60, mins % 60)
+ elif seconds < 100 * 24 * 60 * 60:
+ hours = seconds // 3600
+ return "{0}d {1}h".format(hours // 24, hours % 24)
+ else:
+ days = seconds // (3600 * 24)
+ return "{0}d".format(days)
+
+def download_file(ftp, file_path, local_file_path, size=None, offset=0):
+ filename = os.path.basename(file_path)
+ local_dirs = os.path.dirname(local_file_path)
+ if local_dirs:
+ os.makedirs(local_dirs, exist_ok=True)
+
+ if size is None:
+ size = ftp.size(file_path)
+
+ BAR_WIDTH = 50
+ if size is not None:
+ progress_text = "\r{percent:^4.0%}[{bar_done:"
+ progress_text += str(BAR_WIDTH) + "}] {bytes:"
+ size_len = len(str(size))
+ progress_text += str(int(1 + (size_len - 1) * 4 / 3))
+ progress_text += ",d} {rate:>8}/s {eta:11}"
+ else:
+ progress_text = "\rRetrieved {bytes} bytes {rate:>6}/s {eta:11}"
+
+ print("Downloading {} ({} bytes)".format(filename,
+ "unknown" if size is None else size))
+
+ begin_tsp = time.time()
+ bytes_sofar = 0
+ def get_writer(local_file):
+ sample_bytes = 0
+ sample_tsp = begin_tsp
+ sample_rate = 0
+ def writer(data):
+ nonlocal bytes_sofar, sample_bytes, sample_tsp, sample_rate
+
+ local_file.write(data)
+ bytes_sofar += len(data)
+ percent_done = 0
+
+ now_tsp = time.time()
+ timediff = abs(now_tsp - sample_tsp)
+ bytediff = bytes_sofar - sample_bytes
+ if timediff >= 1 and bytediff > 0:
+ sample_tsp, sample_bytes = now_tsp, bytes_sofar
+ sample_rate = bytediff / timediff
+ elif bytediff == 0 and timediff >= 2:
+ sample_tsp, sample_bytes = now_tsp, bytes_sofar
+ sample_rate = 0
+ # else timediff too small or bytediff zero
+
+ rate_str = format_bytes(int(sample_rate))
+ eta_str = "eta unknown"
+ if size is not None and bytes_sofar + offset <= size:
+ percent_done = 1.0 * (bytes_sofar + offset) / size
+ bytes_left = size - offset - bytes_sofar
+
+ if sample_rate > 0:
+ eta_sec = bytes_left / sample_rate
+ eta_str = "eta " + format_time(math.ceil(eta_sec))
+
+ print(progress_text.format(
+ percent = percent_done,
+ bytes = offset + bytes_sofar,
+ bar_done = "=" * int((BAR_WIDTH - 1) * percent_done) + ">",
+ rate = rate_str,
+ eta = eta_str), end="")
+ return writer
+
+ write_mode = "ab" if offset else "wb"
+ with open(local_file_path, write_mode) as local_file:
+ ftp.retrbinary("RETR " + file_path, get_writer(local_file),
+ rest=offset if offset else None)
+
+ duration = time.time() - begin_tsp
+ rate_str = format_bytes(int(bytes_sofar / duration))
+ print(progress_text.format(
+ percent = 1.00,
+ bytes = offset + bytes_sofar,
+ bar_done = "=" * (BAR_WIDTH - 1) + ">",
+ rate = rate_str,
+ eta = "in " + format_time(math.ceil(duration))))
+
+ # adjust modification times (server returns UTC)
+ timeval = ftp.sendcmd("MDTM " + file_path)[4:]
+ mtime = dt_from_ftp(timeval).timestamp()
+ os.utime(local_file_path, times=(mtime, mtime))
+
+def reget_file(ftp, file_path, local_file_path):
+ # do not send a REST(art) command when starting at the beginning
+ offset = None
+ file_size = ftp.size(file_path)
+ if file_size is not None:
+ try:
+ local_size = os.path.getsize(local_file_path)
+ if local_size > file_size:
+ print("Local size %d is larger than remote %d" % (local_size, file_size))
+ return
+ elif local_size == file_size:
+ # assume fully downloaded. Maybe check mtime?
+ print("Already completed:", file_path)
+ return
+ elif local_size > 0:
+ offset = local_size
+ print("Downloading {0:d} remaining bytes of {1}".format(file_size - offset, file_path))
+ except OSError:
+ pass
+
+ download_file(ftp, file_path, local_file_path, size=file_size, offset=offset)
+
+# Currently, the values indicate the allowed argument count
+cmds = {
+ "pwd": (0,),
+ "dir": (0, 1),
+ "cd": (1,),
+ "ls": (0, 1),
+ "mlsd": (0, 1),
+ "get": (1, 2), # TODO: 2 is not implemented yet
+ "reget": (1, 2),
+ "lcd": (1,),
+ "rhelp": (0,),
+ "help": (0,),
+ "bye": (0,),
+ "quit": (0,),
+ "!": (0, 1)
+}
+
+with FTP() as ftp:
+ ftp.connect(host, port)
+ ftp.login(user, passwd)
+ print(ftp.getwelcome())
+ while True:
+ try:
+ cmd = input("ftp> ")
+ try:
+ if cmd.startswith("!"):
+ cmd, value = "!", cmd[1:]
+ else:
+ cmd, value = cmd.split(" ", 2)
+ except ValueError:
+ value = ""
+ if not cmd:
+ continue
+ if cmd not in cmds:
+ print("?Invalid command")
+ continue
+ if value == "" and 0 not in cmds[cmd]:
+ print("Missing argument for", cmd)
+ continue
+ except KeyboardInterrupt:
+ print("")
+ continue
+ except EOFError:
+ print("")
+ try:
+ print(ftp.quit())
+ except EOFError:
+ pass
+ break
+
+ try:
+ if cmd == "pwd":
+ print(ftp.pwd())
+ elif cmd in ("dir", "ls"):
+ ftp.dir(value)
+ elif cmd == "cd":
+ print(ftp.cwd(value))
+ elif cmd == "mlsd":
+ for name, facts in ftp.mlsd(value):
+ if name not in (".", ".."):
+ print(format_mlsd(name, facts))
+ elif cmd in "get":
+ save_file = os.path.basename(value)
+ download_file(ftp, value, save_file)
+ elif cmd in "reget":
+ save_file = os.path.basename(value)
+ reget_file(ftp, value, save_file)
+ elif cmd == "lcd":
+ os.chdir(value)
+ print("Local directory is now", os.getcwd())
+ elif cmd == "rhelp":
+ print(ftp.sendcmd("help"))
+ elif cmd == "help":
+ print("Commands are: " + " ".join(cmds.keys()))
+ elif cmd == "!":
+ if value:
+ subprocess.call(value, shell=True)
+ else:
+ shell = os.getenv("SHELL")
+ shell = shell if shell else "sh"
+ subprocess.call([shell])
+ elif cmd in ("bye", "quit"):
+ try:
+ print(ftp.quit())
+ except EOFError:
+ pass
+ break
+ except KeyboardInterrupt:
+ print("")
+ try:
+ ftp.voidcmd("PWD")
+ except EOFError:
+ print("Not connected.")
+ except all_errors as e:
+ # assume PWD is always possible and that any errors must result
+ # from the previous operation
+ print("Error:", e)
+ # discard good PWD response
+ ftp.voidresp()
+ except all_errors as e:
+ print("Error:", e)
+ except OSError as e: # for get, reget
+ print("ftp: local:", e)