#!/usr/bin/python # Recursively list FTP directory contents # # Copyright (C) 2013 Peter Wu # Prepend directory name to files and list files: # awk '/^Directory/{if($0 != "Directory /")dir=substr($0, index($0, "/"))}/^-/{n=index($0,$9);print substr($0,1, n - 1) dir "/" substr($0,n)}' import sys from ftplib import FTP import re user = "anonymous" passwd = "anon" host = None port = 21 path = "/" patt_url = re.compile(r""" ^ (?:ftp://)? (?: (?P.+?) (?: :(?P.+?) )? @ )? (?P[a-zA-Z0-9.-]+) (:(?P\d+))? (?P/.*)? $ """, re.VERBOSE) if len(sys.argv) >= 2: m = patt_url.match(sys.argv[1]) if m: if m.group("user") is not None: user = m.group("user") if m.group("passwd") is not None: passwd = m.group("passwd") host = m.group("host") if m.group("port") is not None: port = int(m.group("port")) if m.group("path") is not None: path = m.group("path") if host is None: print("Usage: python", sys.argv[0], "[ftp://][user[:pass]@]ftp.example.com[:21][/path]", file=sys.stderr) sys.exit(1) # directories for which the contents needs to be retrieved dirs = [] dirs.append(path) patt_list = re.compile(r""" ^ .[-rwxXst]{9}\ + # permissions \d+\ + # links count \S+\ + # owner \S+\ + # group \d+\ + # size \S{3}\ + # month \d{2}\ + # day (?:\d{4,}|\d{2}:\d{2}) # year or time \ (.+?) # name (?:\ ->\ (.+))? # symlink [\r\n]* # line contains $ """, re.VERBOSE) def get_ls_processor(cwd): def process_dir(line): print(line) if line[0] == 'd': m = patt_list.match(line) name = m.group(1) if cwd == "/": dir = "/" + name else: dir = cwd + "/" + name print("Queuing", dir, file=sys.stderr) dirs.append(dir) # elif line[0] == 'l': # m = patt_list.match(line) # print("Link", m.group(2)) return process_dir with FTP() as ftp: ftp.connect(host, port) ftp.login(user, passwd) print(ftp.getwelcome(), file=sys.stderr) while dirs: dir = dirs.pop() ftp.cwd(dir) print("Directory", ftp.pwd()) ftp.retrlines('LIST', get_ls_processor(dir)) print("Queue size:", len(dirs), file=sys.stderr)