--- es6.py- 2016-09-01 01:00:00.000000000 +0900 +++ es6.py 2016-09-01 03:00:00.000000000 +0900 @@ -14,41 +14,44 @@ f = lambda t, s1: t.replace( enc( tbl.index(s1) ), add + s1 + add) return reduce(f, tbl, s) -def div_str(s, tbl): - qt = '"' - enc_qt = enc( tbl.index(qt) ) - if enc_qt not in s: - return [ [ None, s ] ] - i = s.index(enc_qt) - if i > 0: - return [ [ None, s[:i] ] ] + div_str( s[i:], tbl ) - n = len(enc_qt) - j = n + s[n:].index(enc_qt) + n if enc_qt in s[n:] else len(s) - qs = decode( s[:j], tbl ) - return [ [ 'str', qs ] ] + div_str( s[j:], tbl ) - -def cut_comment(s, tbl): - sta = enc( tbl.index('/*') ) - end = enc( tbl.index('*/') ) +def fidx(s, lst): + idxs = [ s.index(t) for t in lst if t in s ] + return min(idxs) if len(idxs) > 0 else None + +def idxs(s, sta, end): if sta not in s: - return '' + return (None, None) i = s.index(sta) + i2 = i +len(sta) + if end not in s[i2:]: + return ( i, len(s) ) + j = i2 + s[i2:].index(end) + j2 = j + len(end) + return (i, j2) + +def div_str_cut_comment(s, tbl): + targs = [ + [ 'str', '"', '"' ], + [ 'comment', '/*', '*/' ] + ] + targs = [ [ e[0] ] + [ enc( tbl.index(s1) ) for s1 in e[1:] ] for e in targs ] + i = fidx( s, zip(*targs)[1] ) + if i is None: + return [ [ None, s ] ] if i > 0: - return s[:i] + cut_comment( s[i:], tbl ) - sta_n = len(sta) - end_n = len(end) - j = sta_n + s[sta_n:].index(end) + end_n if end in s[sta_n:] else len(s) - return cut_comment( s[j:], tbl ) + return [ [ None, s[:i] ] ] + div_str_cut_comment( s[i:], tbl ) + (k, sta, end) = (None, None, None) + (k, sta, end) = next( ( e for e in targs if s.startswith( e[1] ) ), (k, sta, end) ) + (_, j) = idxs(s, sta, end) + r = [ [ 'str', decode( s[:j], tbl ) ] ] if k == 'str' else [] + return r + div_str_cut_comment( s[j:], tbl ) def es_split(s): tbl = [ '/*', '*/', '(', ')', ';', ',', '*', '"', ' ', '\t', '\n' ] s = encode(s, tbl) print('encode:\n{}\n'.format(s)) - s = cut_comment(s, tbl) - print('cut_comment:\n{}\n'.format(s)) - - lst = div_str(s, tbl) + lst = div_str_cut_comment(s, tbl) print('div_str:\n{}\n'.format(lst)) f = lambda e: decode(e[1], tbl, add_spc=True).split() if e[0] is None else [ e[1] ]