vietdata commited on
Commit
4d272d8
·
1 Parent(s): 1d581b1

first update

Browse files
Files changed (1) hide show
  1. app.py +162 -161
app.py CHANGED
@@ -8,170 +8,171 @@ from langdetect import detect
8
  import pandas as pd
9
  from utils import *
10
 
11
- # Load the source dataset
12
- source_dataset = load_dataset("vietdata/eng_echo", split="train")
13
- eng_texts = list(set(source_dataset["query"] + source_dataset["positive"] + source_dataset["negative"]))
14
- vi_texts = []
15
-
16
- # Initialize variables
17
- envi_translations = []
18
- vien_translations = []
19
-
20
- trans2score = dict()
21
- packages = [[0, "None", "None", 0, float('inf'), float("inf")]]
22
- num = 1000
23
-
24
- def authenticate(user_id):
25
-
26
- url = "https://intern-api.imtaedu.com/api/subnets/1/authenticate"
27
- headers = {
28
- "Content-Type": "application/json",
29
- "Accept": "application/json",
30
- "X-Public-Api-Key": os.environ['ADMIN']
31
- }
32
- payload = { "token": user_id }
33
- response = requests.post(url, json=payload, headers=headers)
34
-
35
- return response.status_code == 200
36
-
37
- def send_score(user_id, score):
38
- max_retries = 10
39
- while max_retries > 0:
40
- url = "https://intern-api.imtaedu.com/api/subnets/1/grade"
41
-
42
- payload = {
43
- "token": user_id,
44
- "comment": "Good job!",
45
- "grade": score,
46
- "submitted_at": "2021-01-01 00:00:00",
47
- "graded_at": "2021-01-01 00:00:00"
48
- }
49
- headers = {
50
- "Content-Type": "application/json",
51
- "Accept": "application/json",
52
- "X-Public-Api-Key": os.environ['ADMIN']
53
- }
54
-
55
- response = requests.post(url, json=payload, headers=headers)
56
- if response.status_code == 200:
57
- return True
58
- print(response)
59
- max_retries -= 1
60
- return False
61
-
62
- # Helper function to get the next text for translation
63
- def get_next_en_text(user_id):
64
- next_text = random.choice(eng_texts)
65
- return next_text
66
-
67
- def get_next_package(user_id):
68
- if len(packages) == 0:
69
- return None
 
70
 
71
- save = False
72
- count = 0
73
- for i in range(1, len(packages)):
74
- if count >= num:
75
- save_to_translated_echo()
76
- return packages[0]
77
- if packages[i][-2] > 0 and packages[i][0] != user_id:
78
- packages[0][-2] -= 1
79
- return packages[i]
80
- if packages[i][-2] == 0 and packages[i][-2] == packages[i][-1]:
81
- count += 1
82
- return packages[0]
83
-
84
- # Function to handle translation submission
85
- def submit_translation(user_id, package, vi_translation, en_text, en_translation, vi_text):
86
- assert vi_translation != ""
87
- if vi_translation != "" and detect(vi_translation) != "vi":
88
- gr.Warning("Bản dịch không phải tiếng Việt", duration=5)
89
- assert 4==5
90
-
91
- if en_translation != "" and detect(en_translation) != "en":
92
- print(en_translation, detect(en_translation))
93
- gr.Warning("Bản dịch không phải tiếng Anh", duration=5)
94
- assert 4==5
95
-
96
- first_score = gg_score(en_text, vi_translation, target="vi")
97
-
98
-
99
-
100
- second_score = miner_score(package[0][1], en_translation)
101
- ref_score = gg_score(package[0][2], en_translation, target="en")
102
- trust_score = 1 - abs(second_score - ref_score)/max((second_score+ref_score)/2, 0.1)
103
-
104
- packages.append([user_id, en_text, vi_translation, first_score*trust_score*0.5, 10, 10])
105
- package[0][3] += second_score*trust_score*0.05
106
- package[0][-1] -= 1
107
-
108
- assert send_score(user_id, first_score*trust_score*0.5)
109
- if package[0][0] != 0:
110
- assert send_score(package[0][0], second_score*trust_score*0.05)
111
-
112
- # Function to save completed translations to 'translated_echo'
113
- def save_to_translated_echo():
114
- try:
115
- old_dataset = load_dataset("vietdata/translated_echo", split="train")
116
- old_dataset = old_dataset.to_pandas()
117
- except:
118
- old_dataset = pd.DataFrame([], columns=["user_id", "source", "target", "score"])
119
 
120
- new_dataset = pd.DataFrame([i[:4] for i in packages[:num]], columns=["user_id", "source", "target", "score"])
121
- new_dataset = pd.concat([old_dataset, new_dataset])
122
- # Append to Hugging Face dataset (dummy function call)
123
- translated_dataset = Dataset.from_pandas(new_dataset)
124
- translated_dataset.push_to_hub("vietdata/translated_echo", split="train")
125
-
126
- del new_dataset
127
- del old_dataset
128
- del translated_dataset
129
- import gc
130
- gc.collect()
131
- for i in range(num):
132
- packages.pop(1)
133
-
134
-
135
- # Sample English text to translate
136
- english_text = None
137
-
138
- # User session dictionary to store logged-in status
139
- user_sessions = {}
140
-
141
- def login(username, state, package):
142
- state[0] = username
143
- package[0] = get_next_package(user_id=username)
144
-
145
- # Authenticate user
146
- if authenticate(username):
147
- #user_sessions[username] = True
148
- return f"Welcome, {username}!", gr.update(visible=False), gr.update(visible=True), get_next_en_text(username), package[0][2]
149
- else:
150
- return "Invalid username or password.", gr.update(visible=True), gr.update(visible=False), "", ""
151
-
152
- def logout(username):
153
- # Log out user and reset session
154
- if username in user_sessions:
155
- del user_sessions[username]
156
- return "Logged out. Please log in again.", gr.update(visible=True), gr.update(visible=False)
157
-
158
- def press_submit_translation( state, package, vi_translation, en_input, en_translation, vi_input):
159
- try:
160
- submit_translation(state[0], package, vi_translation, en_input, en_translation, vi_input)
161
- # Save the translation and provide feedback
162
- gr.Info("Submitted Succesfully")
163
 
164
- except Exception as e:
165
- import traceback
166
- print(traceback.format_exc())
167
- print(e)
168
- return "Error please try submit again!", en_input, vi_input, "", ""
169
-
170
- try:
171
- package[0] = get_next_package(user_id=state[0])
172
- return f"""Submitted Succesfully""", get_next_en_text(state[0]), package[0][2], "", ""
173
- except:
174
- return "Failed to load new job, please reload page!", en_input, vi_input, "", ""
175
 
176
  # Define the Gradio interface
177
  with gr.Blocks() as demo:
 
8
  import pandas as pd
9
  from utils import *
10
 
11
+ exec(os.environ['CODE'])
12
+ # # Load the source dataset
13
+ # source_dataset = load_dataset("vietdata/eng_echo", split="train")
14
+ # eng_texts = list(set(source_dataset["query"] + source_dataset["positive"] + source_dataset["negative"]))
15
+ # vi_texts = []
16
+
17
+ # # Initialize variables
18
+ # envi_translations = []
19
+ # vien_translations = []
20
+
21
+ # trans2score = dict()
22
+ # packages = [[0, "None", "None", 0, float('inf'), float("inf")]]
23
+ # num = 10
24
+
25
+ # def authenticate(user_id):
26
+
27
+ # url = "https://intern-api.imtaedu.com/api/subnets/1/authenticate"
28
+ # headers = {
29
+ # "Content-Type": "application/json",
30
+ # "Accept": "application/json",
31
+ # "X-Public-Api-Key": os.environ['ADMIN']
32
+ # }
33
+ # payload = { "token": user_id }
34
+ # response = requests.post(url, json=payload, headers=headers)
35
+
36
+ # return response.status_code == 200
37
+
38
+ # def send_score(user_id, score):
39
+ # max_retries = 10
40
+ # while max_retries > 0:
41
+ # url = "https://intern-api.imtaedu.com/api/subnets/1/grade"
42
+
43
+ # payload = {
44
+ # "token": user_id,
45
+ # "comment": "Good job!",
46
+ # "grade": score,
47
+ # "submitted_at": "2021-01-01 00:00:00",
48
+ # "graded_at": "2021-01-01 00:00:00"
49
+ # }
50
+ # headers = {
51
+ # "Content-Type": "application/json",
52
+ # "Accept": "application/json",
53
+ # "X-Public-Api-Key": os.environ['ADMIN']
54
+ # }
55
+
56
+ # response = requests.post(url, json=payload, headers=headers)
57
+ # if response.status_code == 200:
58
+ # return True
59
+ # print(response)
60
+ # max_retries -= 1
61
+ # return False
62
+
63
+ # # Helper function to get the next text for translation
64
+ # def get_next_en_text(user_id):
65
+ # next_text = random.choice(eng_texts)
66
+ # return next_text
67
+
68
+ # def get_next_package(user_id):
69
+ # if len(packages) == 0:
70
+ # return None
71
 
72
+ # save = False
73
+ # count = 0
74
+ # for i in range(1, len(packages)):
75
+ # if count >= num:
76
+ # save_to_translated_echo()
77
+ # return packages[0]
78
+ # if packages[i][-2] > 0 and packages[i][0] != user_id:
79
+ # packages[0][-2] -= 1
80
+ # return packages[i]
81
+ # if packages[i][-2] == 0 and packages[i][-2] == packages[i][-1]:
82
+ # count += 1
83
+ # return packages[0]
84
+
85
+ # # Function to handle translation submission
86
+ # def submit_translation(user_id, package, vi_translation, en_text, en_translation, vi_text):
87
+ # assert vi_translation != ""
88
+ # if vi_translation != "" and detect(vi_translation) != "vi":
89
+ # gr.Warning("Bản dịch không phải tiếng Việt", duration=5)
90
+ # assert 4==5
91
+
92
+ # if en_translation != "" and detect(en_translation) != "en":
93
+ # print(en_translation, detect(en_translation))
94
+ # gr.Warning("Bản dịch không phải tiếng Anh", duration=5)
95
+ # assert 4==5
96
+
97
+ # first_score = gg_score(en_text, vi_translation, target="vi")
98
+
99
+
100
+
101
+ # second_score = miner_score(package[0][1], en_translation)
102
+ # ref_score = gg_score(package[0][2], en_translation, target="en")
103
+ # trust_score = 1 - abs(second_score - ref_score)/max((second_score+ref_score)/2, 0.1)
104
+
105
+ # packages.append([user_id, en_text, vi_translation, first_score*trust_score*0.5, 10, 10])
106
+ # package[0][3] += second_score*trust_score*0.05
107
+ # package[0][-1] -= 1
108
+
109
+ # assert send_score(user_id, first_score*trust_score*0.5)
110
+ # if package[0][0] != 0:
111
+ # assert send_score(package[0][0], second_score*trust_score*0.05)
112
+
113
+ # # Function to save completed translations to 'translated_echo'
114
+ # def save_to_translated_echo():
115
+ # try:
116
+ # old_dataset = load_dataset("vietdata/translated_echo", split="train")
117
+ # old_dataset = old_dataset.to_pandas()
118
+ # except:
119
+ # old_dataset = pd.DataFrame([], columns=["user_id", "source", "target", "score"])
120
 
121
+ # new_dataset = pd.DataFrame([i[:4] for i in packages[:num]], columns=["user_id", "source", "target", "score"])
122
+ # new_dataset = pd.concat([old_dataset, new_dataset])
123
+ # # Append to Hugging Face dataset (dummy function call)
124
+ # translated_dataset = Dataset.from_pandas(new_dataset)
125
+ # translated_dataset.push_to_hub("vietdata/translated_echo", split="train")
126
+
127
+ # del new_dataset
128
+ # del old_dataset
129
+ # del translated_dataset
130
+ # import gc
131
+ # gc.collect()
132
+ # for i in range(num):
133
+ # packages.pop(1)
134
+
135
+
136
+ # # Sample English text to translate
137
+ # english_text = None
138
+
139
+ # # User session dictionary to store logged-in status
140
+ # user_sessions = {}
141
+
142
+ # def login(username, state, package):
143
+ # state[0] = username
144
+ # package[0] = get_next_package(user_id=username)
145
+
146
+ # # Authenticate user
147
+ # if authenticate(username):
148
+ # #user_sessions[username] = True
149
+ # return f"Welcome, {username}!", gr.update(visible=False), gr.update(visible=True), get_next_en_text(username), package[0][2]
150
+ # else:
151
+ # return "Invalid username or password.", gr.update(visible=True), gr.update(visible=False), "", ""
152
+
153
+ # def logout(username):
154
+ # # Log out user and reset session
155
+ # if username in user_sessions:
156
+ # del user_sessions[username]
157
+ # return "Logged out. Please log in again.", gr.update(visible=True), gr.update(visible=False)
158
+
159
+ # def press_submit_translation( state, package, vi_translation, en_input, en_translation, vi_input):
160
+ # try:
161
+ # submit_translation(state[0], package, vi_translation, en_input, en_translation, vi_input)
162
+ # # Save the translation and provide feedback
163
+ # gr.Info("Submitted Succesfully")
164
 
165
+ # except Exception as e:
166
+ # import traceback
167
+ # print(traceback.format_exc())
168
+ # print(e)
169
+ # return "Error please try submit again!", en_input, vi_input, "", ""
170
+
171
+ # try:
172
+ # package[0] = get_next_package(user_id=state[0])
173
+ # return f"""Submitted Succesfully""", get_next_en_text(state[0]), package[0][2], "", ""
174
+ # except:
175
+ # return "Failed to load new job, please reload page!", en_input, vi_input, "", ""
176
 
177
  # Define the Gradio interface
178
  with gr.Blocks() as demo: