#!/usr/bin/env python import sys import six import subprocess as sp import cmd_ut enc = lambda s: s.encode('utf-8') if six.PY2 else s.encode() dec = lambda b: unicode(b, 'utf-8') if six.PY2 else b.decode() def get_stdin(): fi = sys.stdin if six.PY2 else sys.stdin.buffer return fi.read() # b def put_stdout(b): fo = sys.stdout if six.PY2 else sys.stdout.buffer fo.write(b) opt_dic = { 'ISO-2022-JP': '-j', 'EUC-JP': '-e', 'Shift_JIS': '-s', 'UTF-8': '-u', 'ASCII': '', } which_nkf = [] def get_which_nkf(): if which_nkf == []: which_nkf.append( cmd_ut.call( 'which nkf' ) ) return which_nkf[ 0 ] def guess(b, style_opt=False): def func(): if get_which_nkf(): r = cmd_ut.call_comm( enc('nkf -g'), b ) return dec( r ).strip() for b1 in b: if b1 >= b'\x80': return 'UTF-8' # ! return 'ASCII' opt = func() return opt_dic.get(opt, '-u') if style_opt else opt def cvt(b, opt, do_guess=False): if opt not in opt_dic.values(): opt = opt_dic.get(opt, '-u') if opt == '': return b if do_guess: from_opt = guess(b, True) if from_opt == '' or from_opt == opt: return b return cmd_ut.call_comm( enc('nkf ' + opt), b ) def to_utf8(b): opt = guess(b, True) u8 = b if opt in ('-u', '') else cvt(b, '-u') return (u8, opt) def utf8_to(u8, opt): return u8 if opt in ('-u', '') else cvt(u8, opt) def to_str(b): (u8, opt) = to_utf8(b) return ( dec(u8), opt ) def str_to(s, opt): return utf8_to( enc(s), opt ) def str_width(s): return sum( map( lambda c: 1 if ord(c) < 0x80 else 2, s ) ) if __name__ == "__main__": b = get_stdin() opt = guess(b) u8 = cvt(b, '-u') s = dec(u8) # ... u8 = enc(s) b = cvt(u8, opt) put_stdout(b) # EOF