- 论坛徽章:
- 54
|
本帖最后由 haooooaaa 于 2015-07-03 21:53 编辑
回复 10# steveq1
这个测试OK- #!/usr/bin/env python
- #-*- coding:utf-8 -*-
- import base64
- import re
- filename = "3.txt"
- def splitR(s):
- tmp = []
- for i in range(len(s)):
- if not i:
- t1 = s[0].split("<table name=")[0]
- if "img src=" in t1:
- img = t1.split('"')[1]
- tmp.append("<div>" + re.subn("\<[^>]*\>","",t1)[0] + " " + img)
- else:
- t2 = t1.split("<table name=")[0]
- if "</td>" in t2:
- tmp.append("<div>" + re.sub("<[^>]*>"," ",t2) + " ")
- else:
- tmp.append(t2)
- else:
- if "img src=" in s[i]:
- tmp.append(base64.encodestring(s[i].split('"')[1])[0:-1])
- else:
- tmp.append(base64.encodestring(s[i].split("</td>")[0])[0:-1])
-
- return '%s</div>\t[{"option" : "%s' % (tmp[0],'"},{"option" : "'.join(tmp[1:]) + '"}]')
- def convf(filename):
- tmp2 = []
- with open(filename) as f:
- for i in f:
- tmp = []
- if i.startswith("question") or i.strip()=="":
- continue
- i = re.subn(" ","",i)[0]
- s1 = re.split("[ABCD].", i)
- if len(s1) > 3:
- result = splitR(s1)
- else:
- s1 = re.split("[ABCD]\.", i)
- result = splitR(s1)
-
- tmp2.append(result)
- return tmp2
- if __name__ == "__main__":
- with open("base.txt", "w") as f:
- f.write("\n".join(convf(filename)))
复制代码 |
|