worldqwq commited on
Commit
c0481ec
·
1 Parent(s): 96fb84a

Added log.csv creation

Browse files

Former-commit-id: 736ed1428a98e09c934c9ab1e255e7b054d46548

Files changed (1) hide show
  1. SRT.py +16 -25
SRT.py CHANGED
@@ -3,6 +3,7 @@ from csv import reader
3
  from datetime import datetime
4
  import re
5
  import openai
 
6
  from collections import deque
7
 
8
  class SRT_segment(object):
@@ -62,16 +63,6 @@ class SRT_segment(object):
62
  def get_bilingual_str(self) -> str:
63
  return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
64
 
65
- # def set_translation(self, trans):
66
- # if trans[0] == ',':
67
- # trans = trans[1:]
68
- # self.translation = trans
69
-
70
- # def set_src_text(self, src_text):
71
- # if src_text[0] == ',':
72
- # src_text = src_text[1:]
73
- # self.source_text = src_text
74
-
75
  class SRT_script():
76
  def __init__(self, segments) -> None:
77
  self.segments = []
@@ -120,7 +111,7 @@ class SRT_script():
120
 
121
 
122
 
123
- def set_translation(self, translate:str, id_range:tuple, model):
124
  start_seg_id = id_range[0]
125
  end_seg_id = id_range[1]
126
 
@@ -140,6 +131,7 @@ class SRT_script():
140
  lines = translate.split('\n\n')
141
  if len(lines) < (end_seg_id - start_seg_id + 1):
142
  count = 0
 
143
  while count<5 and len(lines) != (end_seg_id - start_seg_id + 1):
144
 
145
  count += 1
@@ -167,6 +159,13 @@ class SRT_script():
167
  if len(lines) < (end_seg_id - start_seg_id + 1):
168
  print("Failed Solving unmatched lines, Manually parse needed")
169
 
 
 
 
 
 
 
 
170
  print(lines)
171
  #print(id_range)
172
  #for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
@@ -191,12 +190,8 @@ class SRT_script():
191
  #print(lines[i])
192
  pass
193
 
194
- def split_seg(self, seg, threshold):
195
- # evenly split seg to 2 parts and add new seg into self.segments
196
- if seg.source_text[:2] == ', ':
197
- seg.source_text = seg.source_text[2:]
198
- if seg.translation[0] == ',':
199
- seg.translation = seg.translation[1:]
200
  source_text = seg.source_text
201
  translation = seg.translation
202
  src_commas = [m.start() for m in re.finditer(',', source_text)]
@@ -205,10 +200,7 @@ class SRT_script():
205
  src_split_idx = src_commas[len(src_commas)//2] if len(src_commas) % 2 == 1 else src_commas[len(src_commas)//2 - 1]
206
  else:
207
  src_space = [m.start() for m in re.finditer(' ', source_text)]
208
- if len(src_space) > 0:
209
- src_split_idx = src_space[len(src_space)//2] if len(src_space) % 2 == 1 else src_space[len(src_space)//2 - 1]
210
- else:
211
- src_split_idx = 0
212
 
213
  if len(trans_commas) != 0:
214
  trans_split_idx = trans_commas[len(trans_commas)//2] if len(trans_commas) % 2 == 1 else trans_commas[len(trans_commas)//2 - 1]
@@ -250,9 +242,8 @@ class SRT_script():
250
  return result_list
251
 
252
 
253
- def check_len_and_split(self, threshold=30):
254
- # DEPRECATED
255
- # if sentence length >= threshold, split this segments to two
256
  segments = []
257
  for seg in self.segments:
258
  if len(seg.translation) > threshold:
@@ -266,7 +257,7 @@ class SRT_script():
266
  pass
267
 
268
  def check_len_and_split_range(self, range, threshold=30):
269
- # if sentence length >= threshold, split this segments to two
270
  start_seg_id = range[0]
271
  end_seg_id = range[1]
272
  extra_len = 0
 
3
  from datetime import datetime
4
  import re
5
  import openai
6
+ import os
7
  from collections import deque
8
 
9
  class SRT_segment(object):
 
63
  def get_bilingual_str(self) -> str:
64
  return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
65
 
 
 
 
 
 
 
 
 
 
 
66
  class SRT_script():
67
  def __init__(self, segments) -> None:
68
  self.segments = []
 
111
 
112
 
113
 
114
+ def set_translation(self, translate:str, id_range:tuple, model,vid_link=None):
115
  start_seg_id = id_range[0]
116
  end_seg_id = id_range[1]
117
 
 
131
  lines = translate.split('\n\n')
132
  if len(lines) < (end_seg_id - start_seg_id + 1):
133
  count = 0
134
+ solved = False
135
  while count<5 and len(lines) != (end_seg_id - start_seg_id + 1):
136
 
137
  count += 1
 
159
  if len(lines) < (end_seg_id - start_seg_id + 1):
160
  print("Failed Solving unmatched lines, Manually parse needed")
161
 
162
+ log_file = "log.csv"
163
+ log_exist = os.path.exists(log_file)
164
+ with open(log_file,"a") as log:
165
+ if not log_exist:
166
+ log.write("range_of_text,content_range,iterations_solving,solved,file_length,video_link")
167
+ log.write(range+','+range+','+count+','+solved+','+len(self.segments)+','+vid_link)
168
+
169
  print(lines)
170
  #print(id_range)
171
  #for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
 
190
  #print(lines[i])
191
  pass
192
 
193
+ def split_seg(self, seg, threshold=500):
194
+ # TODO: evenly split seg to 2 parts and add new seg into self.segments
 
 
 
 
195
  source_text = seg.source_text
196
  translation = seg.translation
197
  src_commas = [m.start() for m in re.finditer(',', source_text)]
 
200
  src_split_idx = src_commas[len(src_commas)//2] if len(src_commas) % 2 == 1 else src_commas[len(src_commas)//2 - 1]
201
  else:
202
  src_space = [m.start() for m in re.finditer(' ', source_text)]
203
+ src_split_idx = src_space[len(src_space)//2] if len(src_space) % 2 == 1 else src_space[len(src_space)//2 - 1]
 
 
 
204
 
205
  if len(trans_commas) != 0:
206
  trans_split_idx = trans_commas[len(trans_commas)//2] if len(trans_commas) % 2 == 1 else trans_commas[len(trans_commas)//2 - 1]
 
242
  return result_list
243
 
244
 
245
+ def check_len_and_split(self, threshold=30000):
246
+ # TODO: if sentence length >= threshold, split this segments to two
 
247
  segments = []
248
  for seg in self.segments:
249
  if len(seg.translation) > threshold:
 
257
  pass
258
 
259
  def check_len_and_split_range(self, range, threshold=30):
260
+ # TODO: if sentence length >= threshold, split this segments to two
261
  start_seg_id = range[0]
262
  end_seg_id = range[1]
263
  extra_len = 0