- 论坛徽章:
- 0
|
用这个脚本替代lynx --dump的使命。
#! /usr/bin/env python
import htmllib
def html2text():
import sys, formatter
if len( sys.argv ) != 2:
sys.exit( "use html2text.py *.html" )
file = sys.argv[1]
try:
f = open(file, 'r')
except IOError, msg:
print file, ":", msg
sys.exit(1)
data = f.read()
f.close()
# DumbWriter() word-wrapped output to default 72 columns
# htmllib.HTMLParser need a formatter
f = formatter.AbstractFormatter(formatter.DumbWriter())
p = htmllib.HTMLParser(f)
p.feed(data)
p.close()
if __name__ == '__main__':
html2text()
.mailcap
text/html; html2text.py %s ; copiousoutput
本文来自ChinaUnix博客,如果查看原文请点:http://blog.chinaunix.net/u/4774/showart_239170.html |
|