summaryrefslogtreecommitdiff
path: root/ftp-get.py
blob: c7e6fab2200a5216dbe509df1f354177ecf074e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/python
# Retrieve a list of files from one server. Saves modification time and supports
# download resumption.
#
# Copyright (C) 2013 Peter Wu <lekensteyn@gmail.com>

from __future__ import print_function, division

import sys
from ftplib import FTP
import re
import os, os.path
from datetime import datetime, timezone

user = "anonymous"
passwd = "anon"
host = None
port = 21
path = "/"

outdir = os.getcwd()

patt_url = re.compile(r"""
^
(?:ftp://)?
(?:
	(?P<user>.+?)
	(?:
		:(?P<passwd>.+?)
	)?
	@
)?
(?P<host>[a-zA-Z0-9.-]+)
(:(?P<port>\d+))?
(?P<path>/.*)?
$
""", re.VERBOSE)

if len(sys.argv) >= 2:
	m = patt_url.match(sys.argv[1])
	if m:
		if m.group("user") is not None:
			user = m.group("user")
		if m.group("passwd") is not None:
			passwd = m.group("passwd")
		host = m.group("host")
		if m.group("port") is not None:
			port = int(m.group("port"))
		if m.group("path") is not None:
			path = m.group("path")
			if not path.endswith("/"):
				print("Path must be a directory",
						file=sys.stderr)
				sys.exit(1)

if host is None:
	print("Usage: python", sys.argv[0],
		"[ftp://][user[:pass]@]ftp.example.com[:21][/path]",
		file=sys.stderr)
	print("A file list is read from stdin", file=sys.stderr)
	sys.exit(1)

files = []
for line in sys.stdin:
	file_path = line.rstrip("\r\n").lstrip("/")
	# note: os.path.join(outdir, file_path) throws away if file_path
	# is absolute. That is *not* intended here.
	local_file_path = outdir + "/" + file_path
	files.append((path + file_path, local_file_path))

if not files:
	print("Nothing to do", file=sys.stderr)
	sys.exit(1)

with FTP() as ftp:
	ftp.connect(host, port)
	ftp.login(user, passwd)
	print(ftp.getwelcome(), file=sys.stderr)

	current = 0
	total = len(files)

	for file_path, local_file_path in files:
		current = current + 1
		print("Processing %d/%d: %s" % (current, total, file_path))

		# TODO: do something with path parameter as passed in a URL?
		remote_file_path = file_path

		# do not send a REST(art) command when starting at the beginning
		offset = None
		file_size = ftp.size(remote_file_path)
		if file_size is not None:
			try:
				local_size = os.path.getsize(local_file_path)
				if local_size > file_size:
					print("Local size %d is larger than remote %d, restarting %s" % (local_size, file_size, file_path))
				elif local_size == file_size:
					# assume fully downloaded. Maybe check mtime?
					print("Already completed:", file_path)
					continue
				elif local_size > 0:
					offset = local_size
					print("Downloading %d remaining bytes of %s" % file_size - offset, file_path)
			except OSError:
				pass

		os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
		with open(local_file_path, "ab") as local_file:
			ftp.retrbinary("RETR " + remote_file_path, local_file.write, rest=offset)

		# adjust modification times (server returns UTC)
		timeval = ftp.sendcmd("MDTM " + remote_file_path)[4:]
		mtime = datetime.strptime(timeval, "%Y%m%d%H%M%S").replace(tzinfo=timezone.utc).timestamp()
		os.utime(local_file_path, times=(mtime, mtime))