diff -urN v3/ezhtml.py v4/ezhtml.py
--- v3/ezhtml.py 2018-09-17 20:15:22.000000000 +0900
+++ v4/ezhtml.py 2018-09-17 23:46:44.000000000 +0900
@@ -28,9 +28,107 @@
return s
return cv_amp(o)
+def get_tag(s):
+ (p, tag) = ('', [])
+
+ while '<' in s:
+ i = s.index('<')
+ (t, s) = ( s[:i], s[i+1:] )
+ p += t
+ tag.append('<')
+ n = s[:1]
+ if n == '/':
+ tag.append('/')
+ s = s[1:]
+ n = s[:1]
+ if n.isalpha():
+ if '>' not in s:
+ err( "not found '>'", 'tag={}'.format(tag) )
+ i = s.index('>')
+ tag = tag[1:] + s[:i].split(' ')
+ s = s[i+1:]
+ break
+ p += ''.join(tag)
+ tag = []
+
+ if not tag:
+ (p, s) = (p+s, '')
+
+ return (p, tag, s)
+
+def start_idx(stk, s):
+ for i in reversed( range( len(stk) ) ):
+ e = stk[i]
+ if type(e) == list and e[0] == s:
+ return i
+ return -1
+
+def solo_tag(e):
+ if type(e) == list:
+ (h, e) = ( '/', e[1:] ) if e[0] == '/' else ('', e)
+ return { h + ' '.join(e): '/' }
+ return e
+
+def solo_tags(lst):
+ return list( map( solo_tag, lst ) )
+
+def untabify(s, n=8):
+ def f(s):
+ r = ''
+ for c in s:
+ if c == '\t':
+ c = ' ' * ( n - len(r) % n )
+ r += c
+ return r
+
+ lst = s.split('\n')
+ lst = map(f, lst)
+ return '\n'.join(lst)
+
+def strip_lst(lst, pre=False):
+ def f(e):
+ if type(e) == dict:
+ return e
+ if pre:
+ while ' \n' in e:
+ e = e.replace(' \n', '\n')
+ return untabify(e)
+ return e.replace('\n', ' ').strip()
+
+ return list( filter( lambda e: e != '', map(f, lst) ) )
+
+def close_tag(lst):
+ tag = lst[0]
+ lst = solo_tags( lst[1:] )
+ lst = strip_lst( lst, tag[0].lower() == 'pre' )
+ v = lst
+ if len(lst) == 1:
+ v = lst[0]
+ elif not lst:
+ v = ''
+ return { ' '.join(tag): v }
+
def html_load(s):
- # ...
- return { 'html': { 'body': { 'p': 'to be continued' } } }
+ stk = []
+ while True:
+ (p, tag, s) = get_tag(s)
+ if not tag:
+ break
+ if p:
+ stk.append( cv_amp(p, 'dec') )
+ if tag[0] != '/':
+ stk.append(tag)
+ continue
+ i = start_idx( stk, tag[1] )
+ if i >= 0:
+ stk = stk[:i] + [ close_tag( stk[i:] ) ]
+ else:
+ stk.append( solo_tag(tag) )
+
+ o = strip_lst( solo_tags(stk) )
+ while type(o) == list and len(o) == 1:
+ o = o[0]
+ return o
if __name__ == "__main__":
b = nkf.get_stdin()