Spaces:
Sleeping
Sleeping
TheAnsIs42
commited on
Commit
·
045f1d7
1
Parent(s):
28616fd
sort the dict, update function name
Browse filesFormer-commit-id: 5b410d8b2bc51ac057516c7a81c273849a60d6b2
- dict_util.py +13 -7
dict_util.py
CHANGED
@@ -4,33 +4,39 @@ import pickle
|
|
4 |
### NOTICE: csv only accept two colomn input. but accept multi-time input.
|
5 |
|
6 |
|
7 |
-
# 1_2_3, 1 is action, 2 is supply object, 3 is
|
8 |
-
def update_dict_csv(term_dict, f):
|
9 |
for rows in csv.reader(f):
|
10 |
word = rows[0].lower()
|
11 |
if word in term_dict:
|
12 |
if rows[1] not in term_dict[word]:
|
13 |
term_dict[word] = term_dict[word]+[rows[1]]
|
|
|
|
|
14 |
else:
|
15 |
term_dict[word]=[rows[1]]
|
|
|
16 |
pass
|
17 |
|
18 |
-
def
|
19 |
for key, val in term_dict.items():
|
20 |
csv.writer(f).writerow([key, val])
|
21 |
pass
|
22 |
|
23 |
-
def
|
24 |
pickle.dump(term_dict, f, pickle.HIGHEST_PROTOCOL)
|
25 |
pass
|
26 |
|
27 |
-
def
|
28 |
term_dict = pickle.load(pickle_f)
|
29 |
update_dict_csv(term_dict, csv_f)
|
30 |
-
#save to pickle file, highest
|
31 |
pickle.dump(term_dict, pickle_f, pickle.HIGHEST_PROTOCOL)
|
32 |
pass
|
33 |
|
|
|
|
|
|
|
34 |
|
35 |
#demo
|
36 |
term_dict_sc2 = {}
|
@@ -38,7 +44,7 @@ with open("./finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
|
|
38 |
update_dict_csv(term_dict_sc2,f)
|
39 |
|
40 |
with open("../test.csv", "w", encoding='utf-8') as w:
|
41 |
-
|
42 |
|
43 |
## for load pickle, just:
|
44 |
# pickle.load(f)
|
|
|
4 |
### NOTICE: csv only accept two colomn input. but accept multi-time input.
|
5 |
|
6 |
|
7 |
+
# 1_2_3, 1 is action, 2 is supply object, 3 is source object
|
8 |
+
def update_dict_csv(term_dict:dict, f):
|
9 |
for rows in csv.reader(f):
|
10 |
word = rows[0].lower()
|
11 |
if word in term_dict:
|
12 |
if rows[1] not in term_dict[word]:
|
13 |
term_dict[word] = term_dict[word]+[rows[1]]
|
14 |
+
else:
|
15 |
+
print("{},{}已存在".format(word, rows[1]))
|
16 |
else:
|
17 |
term_dict[word]=[rows[1]]
|
18 |
+
term_dict = sort_dict(term_dict)
|
19 |
pass
|
20 |
|
21 |
+
def export_csv_dict(term_dict:dict, f):
|
22 |
for key, val in term_dict.items():
|
23 |
csv.writer(f).writerow([key, val])
|
24 |
pass
|
25 |
|
26 |
+
def save_pickle_dict(term_dict:dict, f):
|
27 |
pickle.dump(term_dict, f, pickle.HIGHEST_PROTOCOL)
|
28 |
pass
|
29 |
|
30 |
+
def update_pickel_csv(pickle_f, csv_f):
|
31 |
term_dict = pickle.load(pickle_f)
|
32 |
update_dict_csv(term_dict, csv_f)
|
33 |
+
#save to pickle file, highest protocol to get better performance
|
34 |
pickle.dump(term_dict, pickle_f, pickle.HIGHEST_PROTOCOL)
|
35 |
pass
|
36 |
|
37 |
+
def sort_dict(term_dict:dict):
|
38 |
+
term_dict = dict(sorted(term_dict.items(), key=lambda x:len(x[0]), reverse=True))
|
39 |
+
return term_dict
|
40 |
|
41 |
#demo
|
42 |
term_dict_sc2 = {}
|
|
|
44 |
update_dict_csv(term_dict_sc2,f)
|
45 |
|
46 |
with open("../test.csv", "w", encoding='utf-8') as w:
|
47 |
+
export_csv_dict(term_dict_sc2,w)
|
48 |
|
49 |
## for load pickle, just:
|
50 |
# pickle.load(f)
|