- 论坛徽章:
- 4
|
我的解决方案对应的代码如下:
其中,假定所有词保存在"data.txt"文件中,并且,每个词一行。
然后,在main中调用时,以长度3(可以直接把3更换成需要的次数)作为关键词出现的最低标准。- def addWordToTree(word, statistics):
- """
- Add a word element into statistics tree
- """
- sequence = word[::-1]
- start = statistics
- for item in sequence:
- if start.has_key(item):
- start[item][0] += 1
- else:
- start[item] = [1, {}]
- start = start[item][1]
- def readData(filename):
- """
- Read data from specified data file
- """
- result = {}
- with open(filename, "r") as handle:
- for line in handle.readlines():
- line = line.strip()
- addWordToTree(line.decode("utf-8"), result)
- return result
- def filterData(data, key, status, bar = 4):
- """
- Filter built words tree with expected bar
- """
- for item in data.keys():
- if data[item][0] < bar:
- for subItem in data[item][1]:
- buildResult(data[item][1][subItem][1], key, status, item + subItem)
- else:
- filterData(data[item][1], key + item, status, bar)
- return status
- def buildResult(data, key, status, content):
- """
- Build filtered result, [::-1] is used to ensure key and value in correct direction
- """
- if len(data) == 0:
- if status.has_key(key[::-1]):
- status[key[::-1]].append(content[::-1])
- else:
- status[key[::-1]] = [content[::-1]]
- else:
- for item in data.keys():
- buildResult(data[item][1], key, status, content + item)
- def main():
- data = readData("data.txt")
-
- result = {}
- filterData(data, "", result, 3)
- for key, value in result.iteritems():
- print "=" * 80
- print key.encode("utf-8")
- print "-" * 60
- for item in value:
- print item.encode("utf-8")
- if __name__=="__main__":
- main()
复制代码 |
|