BugZoid commited on
Commit
104c1bb
·
verified ·
1 Parent(s): aeb2715

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -3
app.py CHANGED
@@ -97,14 +97,72 @@ def fine_tune_model():
97
 
98
  st.session_state.model.eval()
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  # Initialize session state
101
  if 'model_loaded' not in st.session_state:
102
  st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
103
  st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
104
  st.session_state.model_loaded = True
105
-
106
- # Rest of your existing functions (clean_generated_text and humanize_text remain the same)
107
- [Previous clean_generated_text and humanize_text functions remain unchanged]
108
 
109
  # UI Components
110
  st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
 
97
 
98
  st.session_state.model.eval()
99
 
100
+ def clean_generated_text(text):
101
+ """Remove comandos e limpa o texto gerado"""
102
+ text = text.strip()
103
+
104
+ # Lista de prefixos de comando para remover
105
+ prefixes = [
106
+ "reescreva o seguinte texto",
107
+ "reescreva este texto",
108
+ "reescreva o texto",
109
+ "traduza",
110
+ "humanize:",
111
+ "humanizar:",
112
+ "em português",
113
+ "de forma mais natural"
114
+ ]
115
+
116
+ # Remove os prefixos de comando
117
+ text_lower = text.lower()
118
+ for prefix in prefixes:
119
+ if text_lower.startswith(prefix):
120
+ text = text[len(prefix):].strip()
121
+ text_lower = text.lower()
122
+
123
+ # Capitaliza a primeira letra
124
+ if text:
125
+ text = text[0].upper() + text[1:]
126
+
127
+ return text
128
+
129
+ def humanize_text(text):
130
+ """Humaniza o texto mantendo coerência e tamanho"""
131
+ prompt = f"reescreva em português natural, mantendo todas as informações: {text}"
132
+
133
+ input_ids = st.session_state.tokenizer(
134
+ prompt,
135
+ return_tensors="pt",
136
+ max_length=1024,
137
+ truncation=True
138
+ ).input_ids
139
+
140
+ # Parâmetros ajustados para melhor coerência
141
+ outputs = st.session_state.model.generate(
142
+ input_ids,
143
+ max_length=1024,
144
+ min_length=len(text.split()),
145
+ do_sample=True,
146
+ temperature=0.1,
147
+ top_p=0.95,
148
+ num_beams=3,
149
+ repetition_penalty=1.2,
150
+ length_penalty=2.0
151
+ )
152
+ result = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
153
+ result = clean_generated_text(result)
154
+
155
+ # Garante tamanho mínimo
156
+ while len(result.split()) < len(text.split()):
157
+ result += " " + " ".join(text.split()[-(len(text.split()) - len(result.split())):])
158
+
159
+ return result
160
+
161
  # Initialize session state
162
  if 'model_loaded' not in st.session_state:
163
  st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
164
  st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
165
  st.session_state.model_loaded = True
 
 
 
166
 
167
  # UI Components
168
  st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")