Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -97,14 +97,72 @@ def fine_tune_model():
|
|
97 |
|
98 |
st.session_state.model.eval()
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
# Initialize session state
|
101 |
if 'model_loaded' not in st.session_state:
|
102 |
st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
|
103 |
st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
|
104 |
st.session_state.model_loaded = True
|
105 |
-
|
106 |
-
# Rest of your existing functions (clean_generated_text and humanize_text remain the same)
|
107 |
-
[Previous clean_generated_text and humanize_text functions remain unchanged]
|
108 |
|
109 |
# UI Components
|
110 |
st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
|
|
|
97 |
|
98 |
st.session_state.model.eval()
|
99 |
|
100 |
+
def clean_generated_text(text):
|
101 |
+
"""Remove comandos e limpa o texto gerado"""
|
102 |
+
text = text.strip()
|
103 |
+
|
104 |
+
# Lista de prefixos de comando para remover
|
105 |
+
prefixes = [
|
106 |
+
"reescreva o seguinte texto",
|
107 |
+
"reescreva este texto",
|
108 |
+
"reescreva o texto",
|
109 |
+
"traduza",
|
110 |
+
"humanize:",
|
111 |
+
"humanizar:",
|
112 |
+
"em português",
|
113 |
+
"de forma mais natural"
|
114 |
+
]
|
115 |
+
|
116 |
+
# Remove os prefixos de comando
|
117 |
+
text_lower = text.lower()
|
118 |
+
for prefix in prefixes:
|
119 |
+
if text_lower.startswith(prefix):
|
120 |
+
text = text[len(prefix):].strip()
|
121 |
+
text_lower = text.lower()
|
122 |
+
|
123 |
+
# Capitaliza a primeira letra
|
124 |
+
if text:
|
125 |
+
text = text[0].upper() + text[1:]
|
126 |
+
|
127 |
+
return text
|
128 |
+
|
129 |
+
def humanize_text(text):
|
130 |
+
"""Humaniza o texto mantendo coerência e tamanho"""
|
131 |
+
prompt = f"reescreva em português natural, mantendo todas as informações: {text}"
|
132 |
+
|
133 |
+
input_ids = st.session_state.tokenizer(
|
134 |
+
prompt,
|
135 |
+
return_tensors="pt",
|
136 |
+
max_length=1024,
|
137 |
+
truncation=True
|
138 |
+
).input_ids
|
139 |
+
|
140 |
+
# Parâmetros ajustados para melhor coerência
|
141 |
+
outputs = st.session_state.model.generate(
|
142 |
+
input_ids,
|
143 |
+
max_length=1024,
|
144 |
+
min_length=len(text.split()),
|
145 |
+
do_sample=True,
|
146 |
+
temperature=0.1,
|
147 |
+
top_p=0.95,
|
148 |
+
num_beams=3,
|
149 |
+
repetition_penalty=1.2,
|
150 |
+
length_penalty=2.0
|
151 |
+
)
|
152 |
+
result = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
153 |
+
result = clean_generated_text(result)
|
154 |
+
|
155 |
+
# Garante tamanho mínimo
|
156 |
+
while len(result.split()) < len(text.split()):
|
157 |
+
result += " " + " ".join(text.split()[-(len(text.split()) - len(result.split())):])
|
158 |
+
|
159 |
+
return result
|
160 |
+
|
161 |
# Initialize session state
|
162 |
if 'model_loaded' not in st.session_state:
|
163 |
st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
|
164 |
st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
|
165 |
st.session_state.model_loaded = True
|
|
|
|
|
|
|
166 |
|
167 |
# UI Components
|
168 |
st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
|