- 论坛徽章:
- 95
|
回复 1# 321wangke321
参考这个:
data1.txt 对应文件 1; data2.txt 对应文件 3;keys.txt 对应文件 2.
#!/usr/bin/env python3
import sys
import os
import tempfile
def load_data1(filename):
number_of, lines_of = {}, {}
with open(filename) as file:
for line in file:
fields = line.split('\t')
if len(fields) > 2:
number = fields[0]
geneid = fields[2]
number_of[geneid] = number
if fields[0] in lines_of:
lines_of[number].append([geneid, line])
else:
lines_of[number] = [[geneid, line]]
return number_of, lines_of
def load_data2(filename):
line_of = {}
with open(filename) as file:
for line in file:
geneid = line.split(maxsplit=1)[0]
line_of[geneid] = line
return line_of
def main():
""""Usage: p data1.txt data2.txt keys.txt"""
number_of, data1_lines_of = load_data1(sys.argv[1])
data2_line_of = load_data2(sys.argv[2])
with open(sys.argv[3]) as file:
for line in file:
geneid = line.split(maxsplit=1)[0]
if geneid not in number_of:
continue
number = number_of[geneid]
print("Part one of " + geneid)
data1_lines = [item[1] for item in data1_lines_of[number]]
path1 = os.path.join(tempfile.gettempdir(), geneid) + "-one.txt"
with open(path1, mode='w') as file1:
for line1 in data1_lines:
file1.write(line1)
print("Part two of" + geneid)
data1_geneids = [item[0] for item in data1_lines_of[number]]
path2 = os.path.join(tempfile.gettempdir(), geneid) + "-two.txt"
with open(path2, mode='w') as file2:
for geneid1 in data1_geneids:
if geneid1 in data2_line_of:
file2.write(data2_line_of[geneid1])
print("")
if __name__ == '__main__':
main()
|
|