- 论坛徽章:
- 0
|
Written by wolfg
问题:申请了一个免费Wiki空间,有备份功能,但保存下来的zip文件里的中文文件名和内容都是乱码,如图
![]()
![]()
分析:解压缩后,用UltraEdit打开乱码的文件,发现转换成UTF-8后可以正常显示中文
解决思路:写一个Python脚本,解压缩,把UTF-8编码的文件名和文件内容转成GBK编码。
google后在ASPN上找到的一个解压缩zip文件的例子http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252508,为了在下面的convert.py脚本里使用,改了一下extract方法,返回zip文件的文件列表。
convert.py先把压缩文件解开,然后读取每个文件,用utf-8编码读取,然后用gbk写入另一个文件
- """ unzip.py
- Version: 1.1
- Extract a zipfile to the directory provided
- It first creates the directory structure to house the files
- then it extracts the files to it.
- Sample usage:
- command line
- unzip.py -p 10 -z c:\testfile.zip -o c:\testoutput
- python class
- import unzip
- un = unzip.unzip()
- un.extract(r'c:\testfile.zip', 'c:\testoutput')
-
- By Doug Tolton
- """
- import sys
- import zipfile
- import os
- import os.path
- import getopt
- class unzip:
- def __init__(self, verbose = False, percent = 10):
- self.verbose = verbose
- self.percent = percent
-
- def extract(self, file, dir):
- if not dir.endswith(':') and not os.path.exists(dir):
- os.mkdir(dir)
- zf = zipfile.ZipFile(file)
- # create directory structure to house files
- self._createstructure(file, dir)
- num_files = len(zf.namelist())
- percent = self.percent
- divisions = 100 / percent
- perc = int(num_files / divisions)
- # extract files to directory structure
- for i, name in enumerate(zf.namelist()):
- if self.verbose == True:
- print "Extracting %s" % name
- elif perc >; 0 and (i % perc) == 0 and i >; 0:
- complete = int (i / perc) * percent
- print "%s%% complete" % complete
- if not name.endswith('/'):
- outfile = open(os.path.join(dir, name), 'wb')
- outfile.write(zf.read(name))
- outfile.flush()
- outfile.close()
- return zf.namelist()
- def _createstructure(self, file, dir):
- self._makedirs(self._listdirs(file), dir)
- def _makedirs(self, directories, basedir):
- """ Create any directories that don't currently exist """
- for dir in directories:
- curdir = os.path.join(basedir, dir)
- if not os.path.exists(curdir):
- os.mkdir(curdir)
- def _listdirs(self, file):
- """ Grabs all the directories in the zip structure
- This is necessary to create the structure before trying
- to extract the file to it. """
- zf = zipfile.ZipFile(file)
- dirs = []
- for name in zf.namelist():
- if name.endswith('/'):
- dirs.append(name)
- dirs.sort()
- return dirs
- def usage():
- print """usage: unzip.py -z <zipfile>; -o <targetdir>;
- <zipfile>; is the source zipfile to extract
- <targetdir>; is the target destination
- -z zipfile to extract
- -o target location
- -p sets the percentage notification
- -v sets the extraction to verbose (overrides -p)
- long options also work:
- --verbose
- --percent=10
- --zipfile=<zipfile>;
- --outdir=<targetdir>;"""
-
- def main():
- shortargs = 'vhp:z:o:'
- longargs = ['verbose', 'help', 'percent=', 'zipfile=', 'outdir=']
- unzipper = unzip()
- try:
- opts, args = getopt.getopt(sys.argv[1:], shortargs, longargs)
- except getopt.GetoptError:
- usage()
- sys.exit(2)
- zipsource = ""
- zipdest = ""
- for o, a in opts:
- if o in ("-v", "--verbose"):
- unzipper.verbose = True
- if o in ("-p", "--percent"):
- if not unzipper.verbose == True:
- unzipper.percent = int(a)
- if o in ("-z", "--zipfile"):
- zipsource = a
- if o in ("-o", "--outdir"):
- zipdest = a
- if o in ("-h", "--help"):
- usage()
- sys.exit()
- if zipsource == "" or zipdest == "":
- usage()
- sys.exit()
-
- unzipper.extract(zipsource, zipdest)
- if __name__ == '__main__': main()
复制代码
- # convert filename & content of utf-8 to gbk
- import unzip
- import codecs
- import sys
- import os
- import os.path
- import zipfile
- def usage():
- print """usage: convert.py backupfile
- backupfile is the file saved from wiki"""
- def main():
- if (len(sys.argv) <>; 2):
- usage()
- sys.exit()
- backupfile = sys.argv[1]
- if not os.path.isfile(backupfile):
- print ("No such file or not a valid file")
- sys.exit()
-
- if (not zipfile.is_zipfile(backupfile)):
- print "Invalid zip file"
- sys.exit()
- workdir = os.path.dirname(os.path.abspath(backupfile))
- outputdir = os.path.join(workdir, os.path.splitext(backupfile)[0])
- un = unzip.unzip()
- namelist = un.extract(backupfile, os.path.join(workdir, outputdir))
- for i, name in enumerate(namelist):
- i = i + 1
- sys.stdout.write('[%d] ' % i)
- try:
- filename = name.decode('utf-8').encode('gbk')
- except:
- filename = name
- sys.stdout.write('%s ...' % filename)
- filename = filename + '.txt'
- try:
- infile = codecs.open( os.path.join(outputdir, name), "r", "utf-8")
- content = infile.read()
- infile.close()
- outfile = codecs.open( os.path.join(outputdir, filename), "w", "gbk" )
- outfile.write(content)
- outfile.close()
- print ' done'
- except:
- print ' failed'
- try:
- os.remove(os.path.join(outputdir, name))
- except:
- pass
- if __name__ == '__main__':
- main()
-
复制代码 |
|