- 论坛徽章:
- 3
|
学生党:飘过~试试python,注意原始编码和目标编码一定要写对了...不然会出问题的
- #!/usr/bin/env python
- # -*- coding: gbk -*-
- """
- @author:白头发
- @copyright: just for fun!
- """
- import os
- import glob
- class FEncoding(object):
- def __init__(self):
- self.extension='.*'
- self.srcsencod=None
- self.dessencod=None
- def setsrcencod(self,srcsencod):
- self.srcsencod=srcsencod
- print self.srcsencod
- def setdesencod(self,dessencod):
- self.dessencod=dessencod
- print self.dessencod
- def setentenson(self,extension):
- if not extension.startswith('.'):
- self.extension=''.join(['.',extension])
- else:
- self.extension=extension
- def convertcode(self,content):
- return content.decode(self.srcsencod).encode(self.dessencod)
- def file222file(self,f_name):
- try:
- f_d=open(f_name,'rb')
- f_c=f_d.read()
- f_d.close()
- print f_c
- f2d=open(f_name,'wb')
- f2d.write(self.convertcode(f_c))
- f2d.close()
- print '{}-转码成功'.format(f_name)
- except Exception as e:
- print e
- def processdirs(self,root,dirs,files):
- for x in files:
- if x.endswith(self.extension) or self.extension=='.*':
- self.file222file(os.path.join(dirs,x))
- if __name__=='__main__':
- trans=FEncoding()
- trans.setentenson(raw_input('文件类型:'))
- trans.setsrcencod(raw_input('原始编码:'))
- trans.setdesencod(raw_input('目标编码:'))
- topdr=raw_input('指定目录:')
- os.path.walk(topdr,trans.processdirs,None)
复制代码 测试:
$ ./www.yhsafe.net.py
文件类型:.csv
原始编码:GB2312
GB2312
目标编码:UTF-8
UTF-8
指定目录:./
1,2,3,小满
./data_in_chinese.csv-转码成功
$ file data_in_chinese.csv
data_in_chinese.csv: UTF-8 Unicode text, with no line terminators |
|