#!/usr/bin/env python import sys import six import subprocess as sp enc = lambda s: s.encode('utf-8') if six.PY2 else s.encode() dec = lambda b: unicode(b, 'utf-8') if six.PY2 else b.decode() def do_cmd(cmd, in_b): cmd = enc(cmd) proc = sp.Popen(cmd, shell=True, stdin=sp.PIPE, stdout=sp.PIPE) return proc.communicate(in_b)[0] def get_stdin(): fi = sys.stdin if six.PY2 else sys.stdin.buffer return fi.read() # b def put_stdout(b): fo = sys.stdout if six.PY2 else sys.stdout.buffer fo.write(b) opt_dic = { 'ISO-2022-JP': '-j', 'EUC-JP': '-e', 'Shift_JIS': '-s', 'UTF-8': '-u', 'ASCII': '', } def guess(b, style_opt=False): b = do_cmd('nkf -g', b) opt = dec(b).strip() return opt_dic.get(opt, '-u') if style_opt else opt def cvt(b, opt, do_guess=False): if opt not in opt_dic.values(): opt = opt_dic.get(opt, '-u') if opt == '': return b if do_guess: from_opt = guess(b, True) if from_opt == '' or from_opt == opt: return b return do_cmd('nkf ' + opt, b) def to_utf8(b): opt = guess(b, True) u8 = b if opt in ('-u', '') else cvt(b, '-u') return (u8, opt) def utf8_to(u8, opt): return u8 if opt in ('-u', '') else cvt(u8, opt) def to_str(b): (u8, opt) = to_utf8(b) return ( dec(u8), opt ) def str_to(s, opt): return utf8_to( enc(s), opt ) def str_width(s): return sum( map( lambda c: 1 if ord(c) < 0x80 else 2, s ) ) if __name__ == "__main__": b = get_stdin() opt = guess(b) u8 = cvt(b, '-u') s = dec(u8) # ... u8 = enc(s) b = cvt(u8, opt) put_stdout(b) # EOF