summaryrefslogtreecommitdiff
path: root/ftp-get.py
diff options
context:
space:
mode:
authorPeter Wu <lekensteyn@gmail.com>2013-06-26 20:00:08 +0200
committerPeter Wu <lekensteyn@gmail.com>2013-06-26 20:00:08 +0200
commiteb5736156bdb736f8197d92fc83b799586ae492e (patch)
treefceeab2428b1935c520c2c92bb22f03126b836a3 /ftp-get.py
parent82c07bc02cafe9b8d6cb9fa2362b2ba6326d6a7b (diff)
downloadscripts-eb5736156bdb736f8197d92fc83b799586ae492e.tar.gz
ftp*.py: add FTP scripts
- ftp-get.py: sequentially download a list of files with one connection. - ftp-list.py: recursively show directory contents. - ftp.py: supports "MLSD" which is missing in inetutils.
Diffstat (limited to 'ftp-get.py')
-rwxr-xr-xftp-get.py110
1 files changed, 110 insertions, 0 deletions
diff --git a/ftp-get.py b/ftp-get.py
new file mode 100755
index 0000000..ed47402
--- /dev/null
+++ b/ftp-get.py
@@ -0,0 +1,110 @@
+#!/usr/bin/python
+# Retrieve a list of files from one server. Saves modification time and supports
+# download resumption.
+#
+# Copyright (C) 2013 Peter Wu <lekensteyn@gmail.com>
+
+import sys
+from ftplib import FTP
+import re
+import os, os.path
+from datetime import datetime, timezone
+
+user = "anonymous"
+passwd = "anon"
+host = None
+port = 21
+path = "/"
+
+outdir = os.getcwd()
+
+patt_url = re.compile(r"""
+^
+(?:ftp://)?
+(?:
+ (?P<user>.+?)
+ (?:
+ :(?P<passwd>.+?)
+ )?
+ @
+)?
+(?P<host>[a-zA-Z0-9.-]+)
+(:(?P<port>\d+))?
+(?P<path>/.*)?
+$
+""", re.VERBOSE)
+
+if len(sys.argv) >= 2:
+ m = patt_url.match(sys.argv[1])
+ if m:
+ if m.group("user") is not None:
+ user = m.group("user")
+ if m.group("passwd") is not None:
+ passwd = m.group("passwd")
+ host = m.group("host")
+ if m.group("port") is not None:
+ port = int(m.group("port"))
+ if m.group("path") is not None:
+ path = m.group("path")
+ print("Warning: path component is ignored", file=sys.stderr)
+
+if host is None:
+ print("Usage: python", sys.argv[0],
+ "[ftp://][user[:pass]@]ftp.example.com[:21][/path]",
+ file=sys.stderr)
+ print("A file list is read from stdin", file=sys.stderr)
+ sys.exit(1)
+
+files = []
+for line in sys.stdin:
+ file_path = line.rstrip("\r\n")
+ # note: os.path.join(outdir, file_path) throws away if file_path
+ # is absolute. That is *not* intended here.
+ local_file_path = outdir + "/" + file_path
+ files.append((file_path, local_file_path))
+
+if not files:
+ print("Nothing to do", file=sys.stderr)
+ sys.exit(1)
+
+with FTP() as ftp:
+ ftp.connect(host, port)
+ ftp.login(user, passwd)
+ print(ftp.getwelcome(), file=sys.stderr)
+
+ current = 0
+ total = len(files)
+
+ for file_path, local_file_path in files:
+ current = current + 1
+ print("Processing %d/%d: %s" % (current, total, file_path))
+
+ # TODO: do something with path parameter as passed in a URL?
+ remote_file_path = file_path
+
+ # do not send a REST(art) command when starting at the beginning
+ offset = None
+ file_size = ftp.size(remote_file_path)
+ if file_size is not None:
+ try:
+ local_size = os.path.getsize(local_file_path)
+ if local_size > file_size:
+ print("Local size %d is larger than remote %d, restarting %s" % (local_size, file_size, file_path))
+ elif local_size == file_size:
+ # assume fully downloaded. Maybe check mtime?
+ print("Already completed:", file_path)
+ continue
+ elif local_size > 0:
+ offset = local_size
+ print("Downloading %d remaining bytes of %s" % file_size - offset, file_path)
+ except OSError:
+ pass
+
+ os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
+ with open(local_file_path, "ab") as local_file:
+ ftp.retrbinary("RETR " + remote_file_path, local_file.write, rest=offset)
+
+ # adjust modification times (server returns UTC)
+ timeval = ftp.sendcmd("MDTM " + remote_file_path)[4:]
+ mtime = datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc).timestamp()
+ os.utime(local_file_path, times=(mtime, mtime))