- 论坛徽章:
- 0
|
我刚刚用python写了一段操作excel的脚本,目的是把一个excel文件按照某一列中的字段拆分成多个文件,例如按照城市或者省份等,但是发现处理一个1700行的文件拆分成40多个文件时要运行30分钟左右,性能太慢,请高手帮忙看看怎么才能优化性能,谢谢。
新手写的脚本,请不要见笑,如下:
'''
Created on 2009-9-25
@author: vidon
'''
#_*_ coding:UTF-8 _*_
import os, sys, time
import win32com.client
xls=win32com.client.Dispatch("Excel.Application")
xls.Visible=False
source_path = input('请输入要操作的文件名路径与名称(例如:D:\ABC\BOOK.XLS):').replace('\r','')
target_path = input('请输入存放数据处理结果的目录(例如:D:\BACK\):').replace('\r','')
x=int(input("请输入表头的行数:").replace('\r',''))
y=int(input("请输入作为拆分数据的依据所在列的号码:").replace('\r',''))
if not os.path.exists(target_path):
os.mkdir(target_path)
print("已经创建指定的目录,准备创建文件,请稍等······")
else:
print("准备创建文件,请稍等······")
#读取原始文件
book=xls.Workbooks.Open(source_path)
sheet=book.ActiveSheet
length = sheet.UsedRange.Rows.Count
y_length = sheet.UsedRange.Columns.Count
keyArray = []
for i in range((x+1),length):
keyArray.append(sheet.Cells(i,y).Value)
#根据指定的列中的字段创建新文件
ll = len(keyArray)
for j in range(1,ll):
filename = target_path + str(keyArray[j]) + ".xls"
if os.path.exists (filename):
continue
else :
sheet.Range(sheet.Cells(1,1),sheet.Cells(x,y_length)).select
sheet.Range(sheet.Cells(1,1),sheet.Cells(x,y_length)).copy
newbook = xls.Workbooks.Add()
newsheet=newbook.ActiveSheet
newsheet.Range(newsheet.Cells(1,1),newsheet.Cells(x,y_length)).select
newsheet.Paste()
newbook.SaveAs(filename)
print ("成功创建文件:",filename)
newbook.close
#拷贝符合条件的记录到指定的文件
print("正在处理数据,请稍等······")
for i in range((x+1),length) :
address = sheet.Cells(i,y).value
address_xls = xls.Workbooks.Open(target_path + address +".xls")
address_sheet = xls.ActiveSheet
sheet = book.ActiveSheet
sheet.Range(sheet.Cells(i,1),sheet.Cells(i,y_length)).copy
address_length = address_sheet.UsedRange.Rows.Count
address_sheet.Range(address_sheet.Cells(address_length+1,1),address_sheet.Cells(address_length+1,y_length)).select
address_sheet.Paste()
address_xls.Save()
address_xls.close
book.close
xls.quit
print ("恭喜!数据处理完毕!")
|
|
|