刚才拿python试了一下,感觉结果还是基本靠谱的,点积余弦之类的函数就直接用numpy了:
- from numpy import (array, dot, arccos)
- from numpy.linalg import norm
- from random import shuffle
- def to_vec(l, m):
- ret=[]
- for e in l:
- sl=[0]*m
- sl[e-1]=1
- ret+=sl
- return ret
- def sim1(u, v):
- return dot(u,v)/norm(u)/norm(v)
- def sim(u, v, m):
- a=to_vec(u, m)
- b=to_vec(v, m)
- return sim1(a,b)
- def test(n):
- a=list(range(1,n+1))
- b=list(range(1,n+1))
- shuffle(b)
- print a,
- print b,
- print "=>",
- print sim(a, b, n)
- for n in range(10):
- test(5)
- for n in range(10):
- test(10)
复制代码 输出结果:
- [1, 2, 3, 4, 5] [1, 3, 2, 5, 4] => 0.2
- [1, 2, 3, 4, 5] [2, 4, 5, 1, 3] => 0.0
- [1, 2, 3, 4, 5] [1, 3, 2, 4, 5] => 0.6
- [1, 2, 3, 4, 5] [3, 1, 5, 4, 2] => 0.2
- [1, 2, 3, 4, 5] [1, 4, 2, 5, 3] => 0.2
- [1, 2, 3, 4, 5] [3, 1, 5, 2, 4] => 0.0
- [1, 2, 3, 4, 5] [3, 5, 2, 4, 1] => 0.2
- [1, 2, 3, 4, 5] [1, 5, 4, 2, 3] => 0.2
- [1, 2, 3, 4, 5] [1, 4, 2, 3, 5] => 0.4
- [1, 2, 3, 4, 5] [4, 5, 3, 2, 1] => 0.2
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 5, 6, 4, 10, 8, 7, 9, 3, 1] => 0.2
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 9, 3, 5, 1, 8, 7, 6, 4, 10] => 0.3
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [5, 7, 4, 9, 3, 8, 6, 1, 2, 10] => 0.1
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [10, 4, 2, 6, 5, 1, 7, 9, 3, 8] => 0.2
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [7, 1, 8, 6, 4, 10, 2, 3, 9, 5] => 0.1
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [7, 3, 8, 5, 10, 6, 9, 1, 4, 2] => 0.1
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [10, 6, 8, 2, 9, 7, 4, 5, 1, 3] => 0.0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 4, 5, 8, 1, 10, 6, 7, 3, 9] => 0.0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [3, 4, 7, 9, 10, 1, 8, 6, 2, 5] => 0.0
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 9, 1, 7, 10, 6, 4, 8, 3, 5] => 0.2
复制代码 |