|
@@ -54,6 +54,7 @@ for line in lines:
|
54
|
54
|
abstract = html.sub('', abstract, )
|
55
|
55
|
train.append([word for word in jieba.cut(abstract)])
|
56
|
56
|
article_info.append({"article_id":i,"__url":data[1]})
|
|
57
|
+print(article_info[:10])
|
57
|
58
|
lines=[]
|
58
|
59
|
print("解析结束")
|
59
|
60
|
dictionary = corpora.Dictionary(train)
|