desarrolloasesoreslocales commited on
Commit
c5be12e
1 Parent(s): 371967d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -29,18 +29,40 @@ def predict(payload):
29
 
30
  recorte_general = ""
31
 
32
- for chunk in cortar_en_bloques(payload, 90):
33
- if model.predict([chunk]).item() == 1:
34
- recorte_general += chunk + " "
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  recorte_final = ""
37
 
38
- for chunk in cortar_en_bloques(recorte_general, 84):
39
- if model.predict([chunk]).item() == 1:
40
- recorte_final += chunk + " "
41
-
42
- # results = model.predict_proba([payload])[0]
43
- # return "\n".join([f"Label: {id2label[i]}, Score: {round(result.item()*100, 2)} %" for i, result in enumerate(results)])
 
 
 
 
 
 
 
 
 
44
 
45
  return recorte_final
46
 
 
29
 
30
  recorte_general = ""
31
 
32
+ # Crear chunks
33
+ chunks = cortar_en_bloques(ocr_text, 150)
34
+ first = -1
35
+ margin = int(len(chunks) * 0.25)
36
+ chunks_removable = chunks[:margin] + chunks[-margin:]
37
+
38
+ for i in range(len(chunks)):
39
+ print('Recortando -', round((i/len(chunks))*100), '%')
40
+ if chunks[i] not in chunks_removable or trim_model.predict([chunks[i]]).item() == 1:
41
+ if first == -1:
42
+ first = i
43
+ recorte_general += chunks[i] + " "
44
+
45
+ if first > 0:
46
+ recorte_general = chunks[first-1] + recorte_general
47
+ print(100, '%')
48
 
49
  recorte_final = ""
50
 
51
+ # Definir tamñano de fragmentos de texto
52
+ # text_splitter2 = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0, length_function=len)
53
+ # Crear chunks
54
+ chunks2 = cortar_en_bloques(recorte_general, 80)
55
+ margin_s = int(len(chunks2) * 0.1)
56
+ margin_e = int(len(chunks2) * 0.1)
57
+ # if margin_s > 1:
58
+ chunks_removable2 = chunks2[:margin_s] + chunks2[-margin_e:]
59
+ # chunks_removable2 = chunks2[-margin_e:]
60
+
61
+ for i in range(len(chunks2)):
62
+ print('Recortando -', round((i/len(chunks2))*100), '%')
63
+ if chunks2[i] not in chunks_removable2 or trim_model.predict([chunks2[i]]).item() == 1:
64
+ recorte_final += chunks2[i] + " "
65
+ print(100, '%')
66
 
67
  return recorte_final
68