#!/usr/bin/env python import os import yaml import cmd_ut import nkf import arg import dbg search_dirs = [ os.path.dirname( __file__ ) ] def get_cmd(path): cmd = 'cat' ps = [ 'http', 'https' ] if any( map( lambda p: path.startswith( p + '://' ), ps ) ): cmd = 'wget -q -O-' elif not path.startswith( '/' ) and not os.path.exists( path ): for d in search_dirs: t = os.path.join( d, path ) if os.path.exists( t ): path = t break return cmd + ' ' + path def get_yaml(path, head_only=False): cmd = get_cmd( path ) + ' | ' if head_only: cmd += cmd_ut.cmd_py( 'from_to' ) + ' "" "" | ' cmd += cmd_ut.cmd_py( 'ezhtml' ) + ' h' b = cmd_ut.call( cmd ) (s, opt) = nkf.to_str( b ) return s def get_obj(path, head_only=False): s = get_yaml( path, head_only ) o = yaml.load( s ) if head_only: o = { 'html': [ o ] } return o def find_tag(o, tag): # Ex. tag 'body' t = type( o ) if t == dict: # len 1 (k, v) = list( o.items() )[ 0 ] if k.split()[ 0 ].lower() == tag: return v return find_tag( v, tag ) if t == list: v = None for d in o: v = find_tag( d, tag ) if v != None: break return v return None def find_tag_path(o, tag_path): # Ex. tag_path 'html/head/title' tags = tag_path.split( '/' ) v = None for tag in tags: v = find_tag( o, tag ) if v == None: break o = v return v def get_tag_path(path, tag_path): head_only = tag_path.startswith( 'html/head' ) o = get_obj( path, head_only=head_only ) s = find_tag_path( o, tag_path ) return s def get_title(path, dv=''): s = get_tag_path( path, 'html/head/title' ) return s if s else dv if __name__ == "__main__": a = arg.new() path = a.pop( 'http://kondoh.html.xdomain.jp/index.html' ) s = get_title( path ) print( nkf.enc( s ) ) # EOF