save_annoy.py 597 B

123456789101112131415161718192021222324252627
  1. from annoy import AnnoyIndex
  2. import random
  3. import pickle
  4. import json
  5. length = 100
  6. t = AnnoyIndex(length,metric="angular")
  7. index_url={}
  8. with open("article_topics") as f:
  9. lines=f.readlines()
  10. lines=[line.strip() for line in lines]
  11. for line in lines:
  12. data=json.loads(line)
  13. url=data["__url"]
  14. aid=data["article_id"]
  15. feature=[0 for _ in range(0,length)]
  16. index_url[aid]=url
  17. for index,score in data["topic_lda"]:
  18. feature[index]=score
  19. t.add_item(aid, feature)
  20. t.build(10)
  21. t.save('article.ann')
  22. with open("index_url","wb+") as f:
  23. pickle.dump(index_url, f)