agent-comment

Sleeping

App Files Files Community

agent-comment / app.py

BugZoid

Update app.py

223938e verified 3 months ago

raw

history blame

5.46 kB

	import streamlit as st
	from transformers import (
	AutoTokenizer,
	AutoModelForSeq2SeqLM,
	T5ForConditionalGeneration,
	T5Tokenizer
	)

	# Initialize session state for models if not already done
	if 'models_loaded' not in st.session_state:
	# Load the main T5 model and tokenizer (using t5-base for better quality)
	st.session_state.t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
	st.session_state.t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")

	# Load the paraphrasing model and tokenizer
	st.session_state.paraphrase_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
	st.session_state.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

	st.session_state.models_loaded = True

	def ensure_minimum_length(text, original_text):
	"""
	Garante que o texto gerado tenha pelo menos o mesmo tamanho do original
	"""
	while len(text.split()) < len(original_text.split()):
	missing_words = len(original_text.split()) - len(text.split())
	if missing_words > 0:
	text = text + " " + original_text[-missing_words:]
	return text

	def paraphrase_text(text, original_text):
	"""
	Apply paraphrasing to the input text using BART model
	"""
	min_length = len(original_text.split())

	inputs = st.session_state.paraphrase_tokenizer.encode(
	text,
	return_tensors="pt",
	max_length=1024,
	truncation=True
	)

	outputs = st.session_state.paraphrase_model.generate(
	inputs,
	max_length=1024,
	min_length=min_length, # Força o tamanho mínimo igual ao original
	do_sample=True,
	temperature=0.3,
	top_p=0.95,
	repetition_penalty=1.2,
	length_penalty=2.0 # Aumentado para favorecer textos mais longos
	)

	result = st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
	return ensure_minimum_length(result, original_text)

	def humanize_text(text):
	"""
	Humanize the input text using T5 model
	"""
	min_length = len(text.split())

	prompt = (
	f"reescreva o seguinte texto em português de forma mais natural e humana, "
	f"mantendo todas as informações e expandindo com detalhes relevantes: {text}"
	)

	input_ids = st.session_state.t5_tokenizer(
	prompt,
	return_tensors="pt",
	max_length=1024,
	truncation=True
	).input_ids

	outputs = st.session_state.t5_model.generate(
	input_ids,
	max_length=1024,
	min_length=min_length, # Força o tamanho mínimo igual ao original
	do_sample=True,
	temperature=0.3,
	top_p=0.95,
	num_beams=5,
	no_repeat_ngram_size=3,
	repetition_penalty=1.2,
	length_penalty=2.0 # Aumentado para favorecer textos mais longos
	)

	result = st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
	return ensure_minimum_length(result, text)

	# UI Components
	st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")

	st.title("🤖 → 🧑 Humanizador de Texto Avançado")
	st.markdown("""
	Este aplicativo transforma textos robotizados em linguagem mais natural e humana,
	mantendo todas as informações originais e garantindo que o texto final seja pelo menos
	do mesmo tamanho que o original.
	""")

	# Input area with expanded capabilities
	input_text = st.text_area(
	"Cole seu texto de robô aqui:",
	height=150,
	help="Cole seu texto aqui para transformá-lo em uma versão mais natural e humana."
	)

	# Advanced settings in sidebar
	with st.sidebar:
	st.header("Configurações Avançadas")
	use_paraphrase = st.checkbox("Ativar Paráfrase", value=True)
	show_original = st.checkbox("Mostrar Texto Original", value=False)

	# Adicionar informações sobre o texto
	if input_text:
	st.write("Informações do texto:")
	st.write(f"Palavras no original: {len(input_text.split())}")

	# Process button with error handling
	if st.button("Humanizar", type="primary"):
	if not input_text:
	st.warning("⚠️ Por favor, cole um texto de robô primeiro!")
	else:
	with st.spinner("Processando o texto..."):
	try:
	# First humanization pass
	humanized_text = humanize_text(input_text)

	# Optional paraphrasing pass
	if use_paraphrase:
	final_text = paraphrase_text(humanized_text, input_text)
	else:
	final_text = humanized_text

	# Display results
	st.success("✨ Texto humanizado:")
	if show_original:
	st.text("Texto original:")
	st.info(input_text)
	st.write(f"Palavras no original: {len(input_text.split())}")
	st.markdown("Resultado:")
	st.write(final_text)
	st.write(f"Palavras no resultado: {len(final_text.split())}")

	except Exception as e:
	st.error(f"❌ Ocorreu um erro durante o processamento: {str(e)}")

	# Footer
	st.markdown("---")
	st.markdown(
	"""
	<div style='text-align: center'>
	<small>Desenvolvido com ❤️ usando Streamlit e Transformers</small>
	</div>
	""",
	unsafe_allow_html=True
	)