#!/usr/bin/env python import sys import os import yaml import subprocess def exit(msg='', code=0): print msg sys.exit(code) def exec_cmd(cmd, out=True): if out: return subprocess.check_output(cmd, shell=True) subprocess.call(cmd, shell=True) def mkdir_if_not(path): if not os.path.exists(path): cmd = "mkdir -p '{}'".format(path) exec_cmd(cmd, False) def yaml_load(fname, def_val): v = def_val if os.path.exists(fname): with open(fname, 'r') as f: v = yaml.load(f) return v def file_write(fname, s): if fname == '-': sys.stdout.write(s) return with open(fname, 'w') as f: f.write(s) yaml_save = lambda fname, v, default_flow_style=False: file_write( fname, yaml.dump(v, default_flow_style=default_flow_style) ) def get_info(path): if os.path.islink(path): return { 'realpath': os.path.realpath(path) } return { 'size': os.path.getsize(path), 'mtime': os.path.getmtime(path) } db_sz = lambda db, p: db.get(p, {}).get('size', 0) def add_db_if_not(db, path): if path in db: return cmd = 'find {} -type f -or -type l'.format(path) s = exec_cmd(cmd) if not s: return lst = s.strip().split('\n') pn = len(path) db[path] = dict( map( lambda p: ( p[pn+1:], get_info(p) ), lst ) ) def is_same(a, b): k = 'realpath' if k in a != k in b: return False if k in a: return a.get(k) == b.get(k) k = 'size' if a.get(k) != b.get(k): return False k = 'mtime' return abs( a.get(k) - b.get(k) ) < 4.0 # FAT resolution is 2 sec ! def cp_rm_lst(db, s, d): db_s = db.get(s, {}) db_d = db.get(d, {}) need_copy = lambda (k, v): k not in db_d or not is_same(db_d.get(k), v) items = filter( need_copy, db_s.items() ) if not items: return ([], []) (lst, _) = zip(*items) lst = list(lst) rm_lst = filter( lambda p: p in db_d, lst ) return (lst, rm_lst) def get_disk_free(path): cmd = "df -k '{}'".format(path) s = exec_cmd(cmd) lst = s.strip().split('\n') lst = map( lambda s: s.split(), lst ) i = lst[0].index('Available') # ! return int( lst[1][i] ) * 1024 def kmgt_str(v): ut = 'KMGT' i = 0 while v >= 1024 * 10 and i < 4: v /= 1024 i += 1 return str(v) + ( ut[i-1] if i > 0 else '' ) def rm(p, d, db_d, rm_lst): # ret removed size sz = db_sz(db_d, p) cmd = "rm -f '{}'".format( os.path.join(d, p) ) exec_cmd(cmd, False) rm_lst.remove(p) return sz def cp(p, s, d, db_s): # ret cp size #cmd = "tar cf - -C {} '{}' | tar xf - -C {}".format(s, p, d) #exec_cmd(cmd, False) (dn, fn) = os.path.split(p) d_dn = os.path.join(d, dn) mkdir_if_not(d_dn) s_p = os.path.join(s, p) cmd = "cp -p '{}' '{}'".format(s_p, d_dn) exec_cmd(cmd, False) return db_sz(db_s, p) def backup(db, s, d, lst, rm_lst): db_s = db.get(s, {}) db_d = db.get(d, {}) add_sz = sum( map( lambda p: db_sz(db_s, p), lst ) ) rm_sz = sum( map( lambda p: db_sz(db_d, p), rm_lst ) ) dst_free = get_disk_free(d) over = add_sz - rm_sz - dst_free if over > 0: exit( 'No disk space, over ' + kmgt_str(over), 2 ) cp_sz = 0 while lst: p = lst.pop(0) if p in rm_lst: dst_free += rm(p, d, db_d, rm_lst) sz = db_sz(db_s, p) while sz > dst_free: rm_p = rm_lst.pop() dst_free += rm(rm_p, d, db_d, rm_lst) sz = cp(p, s, d, db_s) dst_free -= sz cp_sz += sz print kmgt_str(cp_sz) + ' / ' + kmgt_str(add_sz) + ' : ' + p + ' / ' + kmgt_str(sz) for k in lst: db_d[k] = db_s.get(k).copy() if __name__ == "__main__": if len(sys.argv) < 3: exit( 'Usage {} srcdir dstdir [bk.yaml]'.format( sys.argv[0] ) ) cut_tail = lambda s: s[:-1] if s[-1] == '/' else s s = cut_tail( sys.argv[1] ) if not os.path.exists(s): exit( 'Not found ' + s, 1 ) d = cut_tail( sys.argv[2] ) mkdir_if_not(d) i = 3 fn = sys.argv[i] if i < len(sys.argv) else '' db = yaml_load(fn, {}) if fn else {} add_db_if_not(db, s) add_db_if_not(db, d) (lst, rm_lst) = cp_rm_lst(db, s, d) if not lst: exit( 'already backuped' ) backup(db, s, d, lst, rm_lst) if fn: yaml_save(fn, db) # EOF