--- es9.py- 2016-09-01 04:00:00.000000000 +0900 +++ es9.py 2016-09-01 23:00:00.000000000 +0900 @@ -2,19 +2,14 @@ import sys -def enc(idx): - return '@{:02d}'.format(idx) - def encode(s, tbl): - f = lambda t, s1: t.replace( s1, enc( tbl.index(s1) ) ) - kf = lambda s1: len(s1) - stbl = sorted(tbl, key=kf, reverse=True) - return reduce(f, stbl, s) + f = lambda t, (k, s1, d1, ec): t.replace(s1, ec) + return reduce(f, tbl, s) def decode(s, tbl, add_spc=False): add = ' ' if add_spc else '' - f = lambda t, idx: t.replace( enc(idx), add + tbl[idx] + add ) - return reduce(f, range( len(tbl) ), s) + f = lambda t, (k, s1, d1, ec): t.replace( ec, add + d1 + add ) + return reduce(f, tbl, s) def fidx(s, lst): idxs = [ s.index(t) for t in lst if t in s ] @@ -31,34 +26,58 @@ def idxs(s, sta, end): j2 = j + len(end) return (i, j2) -def div_str_cut_comment(s, tbl): - targs = [ - [ 'str', '"', '"' ], - [ 'comment', '/*', '*/' ] - ] - targs = [ [ e[0] ] + [ enc( tbl.index(s1) ) for s1 in e[1:] ] for e in targs ] +def div_str_cut_comment(s, kdic, tbl): + targs = [ [ 'str', s1, s1 ] for s1 in kdic.get('qt') ] + + cmt_s = kdic.get('cmt_s') + cmt_e = kdic.get('cmt_e') + f_e = lambda idx: cmt_e[idx] if idx < len(cmt_e) else '\n' + targs += [ [ 'cmt', s1, f_e(idx) ] for (idx, s1) in enumerate(cmt_s) ] + + targs = [ [ e[0] ] + [ encode(s1, tbl) for s1 in e[1:] ] for e in targs ] + i = fidx( s, zip(*targs)[1] ) if i is None: return [ [ None, s ] ] if i > 0: - return [ [ None, s[:i] ] ] + div_str_cut_comment( s[i:], tbl ) + return [ [ None, s[:i] ] ] + div_str_cut_comment( s[i:], kdic, tbl ) (k, sta, end) = (None, None, None) (k, sta, end) = next( ( e for e in targs if s.startswith( e[1] ) ), (k, sta, end) ) (_, j) = idxs(s, sta, end) - - d = { '\\"' : '"', '\\n' : '\n', '\\t' : '\t', '\\\\' : '\\' } - tbl_dec_str = [ d.get(s1) if s1 in d else s1 for s1 in tbl ] - r = [ [ 'str', decode( s[:j], tbl_dec_str ) ] ] if k == 'str' else [] - return r + div_str_cut_comment( s[j:], tbl ) + r = [ [ 'str', decode( s[:j], tbl ) ] ] if k == 'str' else [] + return r + div_str_cut_comment( s[j:], kdic, tbl ) def es_split(s): s = s.replace('@', '@ ') - tbl = [ '/*', '*/', '(', ')', ';', ',', '*', '"', ' ', '\t', '\n', '\\"', '\\n', '\\t' '\\\\' ] + kdic = { + 'cmt_s' : [ '/*', '//' ], + 'cmt_e' : [ '*/' ], + 'qt' : [ '"', "'" ], + 'esc' : { '\\n':'\n', '\\t':'\t', '\\"':'"', "\\'":"'", '\\\\':'\\' }, + 'br_s' : [ '(', '[', '{' ], + 'br_e' : [ ')', ']', '}' ], + 'op' : [ '+', '-', '*' ], + 'spc' : [ ' ', '\t', '\n' ], + 'etc' : [ ';', ',' ], + } + + enc = lambda ki, idx: '@{:02d}{:02d}'.format(ki, idx) + + tbl = [] + for (ki, (k, v)) in enumerate( kdic.items() ): + items = v.items() if type(v) is dict else zip(v, v) + for (idx, (s1, d1)) in enumerate(items): + tbl += [ [ k, s1, d1, enc(ki, idx) ] ] + + kf = lambda (k, s1, d1, ec): len(s1) + tbl = sorted(tbl, key=kf, reverse=True) + print('tbl:\n{}\n'.format(tbl)) + s = encode(s, tbl) print('encode:\n{}\n'.format(s)) - lst = div_str_cut_comment(s, tbl) + lst = div_str_cut_comment(s, kdic, tbl) print('div_str:\n{}\n'.format(lst)) f = lambda e: decode(e[1], tbl, add_spc=True).split() if e[0] is None else [ e[1] ]