yufeng0528 4 jaren geleden
bovenliggende
commit
0a2c8dd9ad
1 gewijzigde bestanden met toevoegingen van 25 en 0 verwijderingen
  1. 25 0
      lda/save_annoy.py

+ 25 - 0
lda/save_annoy.py

@@ -0,0 +1,25 @@
1
+from annoy import AnnoyIndex
2
+import random
3
+import pickle
4
+import json
5
+length = 100
6
+t = AnnoyIndex(length,metric="angular")
7
+index_url={}
8
+with open("article_topics") as f:
9
+	lines=f.readlines()
10
+lines=[line.strip() for line in lines]
11
+for line in lines:
12
+	data=json.loads(line)
13
+	url=data["__url"]
14
+	aid=data["article_id"]
15
+	feature=[0 for _ in range(0,length)]
16
+	index_url[aid]=url
17
+	for index,score in data["topic_lda"]:
18
+		feature[index]=score
19
+    	t.add_item(aid, feature)
20
+t.build(10) 
21
+t.save('article.ann')
22
+with open("index_url","w") as f:
23
+	pickle.dump(index_url,f)
24
+
25
+