- 论坛徽章:
- 0
|
楼上的说的对,写程序是要靠自己不断的实践的
下面是我昨晚写的,输出结果为
AA21_1 重庆市渝中区嘉陵桥西村83号重庆市邮政管理局<400001> [(u'', u'\u7701'), (u'\u91cd\u5e86\u5e02', u'\u5e02'), (u'\u6e1d\u4e2d\u533a', u'\u533a/\u53bf'), (u'', u'\u4e61/\u9547'), (u'\u5609\u9675\u6865\u897f\u6751', u'\u5730\u65b9'), (u'83\u53f7', u'\u53f7'), (u'\u91cd\u5e86\u5e02\u90ae\u653f\u7ba1\u7406\u5c40', u'\u673a\u6784')]
为什么前面的可以,而后面的却是这样的输出码,请帮忙看看程序哪里出了问题
# -*- coding: utf8 -*-
import re, sys
adrIdRgx = re.compile("^.(?:\w*_)+\d+")
f = open('ceshi.csv')
isPrevLineEmpty = True
isPrevLineHeader = False
for line in f.readlines():
line = line.decode('utf8').strip()
if isPrevLineEmpty:
if not adrIdRgx.match(line):
raise InvalidAceFormatCsvException(filePath, lineNum, \
"Header Line not following Empty Line.")
adrHeader = line.split(",")
#print adrHeader[0]
#提取Id号,AA21_1
adrId = adrIdRgx.search(adrHeader[0]).group(0)
#print adrId
#将省/市等类放入一个数组
tknClss = []
for tknCls in adrHeader[1:]:
tknCls = tknCls.strip()
tknClss.append(tknCls)
#for tknCls in tknClss:
#print tknCls
isPrevLineHeader = True
isPrevLineEmpty = False
elif isPrevLineHeader:
adr = line.split(",")
#提取整个地址:重庆市渝中区较场口<400010>
fullAdr = adr[0].strip()
#print fullAdr
#将重庆市/渝中区等元素放入一个数组
tkns = []
for tkn in adr[1:]:
tkn = tkn.strip()
tkns.append(tkn)
#for tkn in tkns:
#if len(tkn) == 1:
#print adrId,tkn
if len(tkns) != len(tknClss):
print "warning"
else:
#将对应的类与元素以元组形式放进一个数组,(省,)(市,重庆市)
nonEmptyTkns = []
for tknCls, tkn in zip(tknClss, tkns):
nonEmptyTkns.append((tkn, tknCls))
#打印输出一个地址记录
print adrId, fullAdr,nonEmptyTkns
#print tkn
isPrevLineHeader = False
isPrevLineEmpty = False
elif line == "":
isPrevLineHeader = False
isPrevLineEmpty = True
else:
print 1 |
|