|
@@ -7,10 +7,10 @@ query_articles_sql = '''
|
7
|
7
|
select a.aid,a.title,a.cities,a.cid,a.other_info,"array_agg"(t.tag_value),days,a.recom,a."rank",read_count
|
8
|
8
|
from articles a
|
9
|
9
|
LEFT JOIN article_tags t on a.aid = t.aid
|
10
|
|
-where a.crt_time > '2018-01-01' and a.atype = '0'
|
|
10
|
+where %s and a.atype = '0'
|
11
|
11
|
and a.stock_aid is NULL
|
12
|
12
|
GROUP BY a.aid
|
13
|
|
-limit 100 OFFSET 0
|
|
13
|
+limit 200 OFFSET 0
|
14
|
14
|
'''
|
15
|
15
|
|
16
|
16
|
query_sell_main_sql = '''
|
|
@@ -42,8 +42,8 @@ and status in (1,2)
|
42
|
42
|
'''
|
43
|
43
|
|
44
|
44
|
|
45
|
|
-def get_articles():
|
46
|
|
- rows = pgsql_util.get_rows(query_articles_sql)
|
|
45
|
+def get_articles(filter):
|
|
46
|
+ rows = pgsql_util.get_rows(query_articles_sql % (filter))
|
47
|
47
|
|
48
|
48
|
attr_list = []
|
49
|
49
|
for row in rows:
|
|
@@ -93,7 +93,7 @@ def to_a1(attr_list):
|
93
|
93
|
dtype = attr['dtype']
|
94
|
94
|
recruit = attr['recruit']
|
95
|
95
|
country = attr['country']
|
96
|
|
- price = attr['price']
|
|
96
|
+ price = attr['price'].replace('起', '')
|
97
|
97
|
days = attr['days']
|
98
|
98
|
recom = attr['recom']
|
99
|
99
|
rank = attr['rank']
|
|
@@ -146,14 +146,25 @@ def to_list_attr(item, a_list):
|
146
|
146
|
return c_list
|
147
|
147
|
|
148
|
148
|
|
149
|
|
-def to_file(data_list):
|
150
|
|
- with open("train_data", "w") as f:
|
|
149
|
+def to_file(data_list, file_name):
|
|
150
|
+ with open(file_name, "w") as f:
|
151
|
151
|
for line in data_list:
|
152
|
152
|
line = [line[:-2], [line[-1]]]
|
153
|
153
|
f.write(str(line) + "\n")
|
154
|
154
|
|
155
|
155
|
|
156
|
|
-if __name__ == '__main__':
|
157
|
|
- attr_list = get_articles()
|
|
156
|
+def ge_train():
|
|
157
|
+ attr_list = get_articles("a.crt_time > '2018-01-01' ")
|
|
158
|
+ new_attr_list = to_a1(attr_list)
|
|
159
|
+ to_file(new_attr_list, "train_data")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+def ge_test():
|
|
163
|
+ attr_list = get_articles("a.crt_time > '2019-09-01' ")
|
158
|
164
|
new_attr_list = to_a1(attr_list)
|
159
|
|
- to_file(new_attr_list)
|
|
165
|
+ to_file(new_attr_list, "test_data")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+if __name__ == '__main__':
|
|
169
|
+ ge_train()
|
|
170
|
+ ge_test()
|