1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
#!/usr/bin/python
# Retrieve a list of files from one server. Saves modification time and supports
# download resumption.
#
# Copyright (C) 2013 Peter Wu <lekensteyn@gmail.com>
from __future__ import print_function, division
import sys
from ftplib import FTP
import re
import os, os.path
from datetime import datetime, timezone
user = "anonymous"
passwd = "anon"
host = None
port = 21
path = "/"
outdir = os.getcwd()
patt_url = re.compile(r"""
^
(?:ftp://)?
(?:
(?P<user>.+?)
(?:
:(?P<passwd>.+?)
)?
@
)?
(?P<host>[a-zA-Z0-9.-]+)
(:(?P<port>\d+))?
(?P<path>/.*)?
$
""", re.VERBOSE)
if len(sys.argv) >= 2:
m = patt_url.match(sys.argv[1])
if m:
if m.group("user") is not None:
user = m.group("user")
if m.group("passwd") is not None:
passwd = m.group("passwd")
host = m.group("host")
if m.group("port") is not None:
port = int(m.group("port"))
if m.group("path") is not None:
path = m.group("path")
if not path.endswith("/"):
print("Path must be a directory",
file=sys.stderr)
sys.exit(1)
if host is None:
print("Usage: python", sys.argv[0],
"[ftp://][user[:pass]@]ftp.example.com[:21][/path]",
file=sys.stderr)
print("A file list is read from stdin", file=sys.stderr)
sys.exit(1)
files = []
for line in sys.stdin:
file_path = line.rstrip("\r\n").lstrip("/")
# note: os.path.join(outdir, file_path) throws away if file_path
# is absolute. That is *not* intended here.
local_file_path = outdir + "/" + file_path
files.append((path + file_path, local_file_path))
if not files:
print("Nothing to do", file=sys.stderr)
sys.exit(1)
with FTP() as ftp:
ftp.connect(host, port)
ftp.login(user, passwd)
print(ftp.getwelcome(), file=sys.stderr)
current = 0
total = len(files)
for file_path, local_file_path in files:
current = current + 1
print("Processing %d/%d: %s" % (current, total, file_path))
# TODO: do something with path parameter as passed in a URL?
remote_file_path = file_path
# do not send a REST(art) command when starting at the beginning
offset = None
file_size = ftp.size(remote_file_path)
if file_size is not None:
try:
local_size = os.path.getsize(local_file_path)
if local_size > file_size:
print("Local size %d is larger than remote %d, restarting %s" % (local_size, file_size, file_path))
elif local_size == file_size:
# assume fully downloaded. Maybe check mtime?
print("Already completed:", file_path)
continue
elif local_size > 0:
offset = local_size
print("Downloading %d remaining bytes of %s" % file_size - offset, file_path)
except OSError:
pass
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
with open(local_file_path, "ab") as local_file:
ftp.retrbinary("RETR " + remote_file_path, local_file.write, rest=offset)
# adjust modification times (server returns UTC)
timeval = ftp.sendcmd("MDTM " + remote_file_path)[4:]
mtime = datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc).timestamp()
os.utime(local_file_path, times=(mtime, mtime))
|