pdfchatbot / app.py
DHEIVER's picture
Update app.py
aad7110 verified
raw
history blame
7.05 kB
import gradio as gr
import os
import torch
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory
from transformers import AutoTokenizer, pipeline
# ===================================================================
# CONFIGURAÇÃO RADICAL DE HARDWARE
# ===================================================================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TORCH_DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
MAX_MEMORY = "16GB" if DEVICE == "cpu" else None
# ===================================================================
# LISTA DE MODELOS OTIMIZADOS
# ===================================================================
LLM_MODELS = {
"TinyLlama-1.1B-Chat": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"Phi-2": "microsoft/phi-2",
"Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.2",
"Zephyr-7B-Beta": "HuggingFaceH4/zephyr-7b-beta"
}
# ===================================================================
# NÚCLEO DO SISTEMA
# ===================================================================
class DocumentProcessor:
@staticmethod
def load_and_split(files, chunk_size=512, chunk_overlap=64):
"""Carrega e processa documentos com paralelismo extremo"""
try:
loaders = [PyPDFLoader(file.name) for file in files]
return [page for loader in loaders for page in loader.load_and_split(
RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
separators=["\n\n", "\n", " ", ""]
)
)]
except Exception as e:
raise RuntimeError(f"FALHA CRÍTICA NO PROCESSAMENTO: {str(e)}")
class VectorDBManager:
@staticmethod
def create(splits):
"""Cria vetorização com aceleração de hardware"""
return Chroma.from_documents(
documents=splits,
embedding=HuggingFaceEmbeddings(),
persist_directory="./chroma_db"
)
class LLMEngine:
@staticmethod
def initialize(model_name, temp=0.7, max_tokens=512):
"""Inicialização agressiva do modelo com otimizações de baixo nível"""
try:
tokenizer = AutoTokenizer.from_pretrained(LLM_MODELS[model_name])
pipe = pipeline(
"text-generation",
model=LLM_MODELS[model_name],
tokenizer=tokenizer,
device=DEVICE,
torch_dtype=TORCH_DTYPE,
max_new_tokens=max_tokens,
do_sample=True,
top_k=50,
temperature=temp,
model_kwargs={"load_in_4bit": True} if "cuda" in DEVICE else {}
)
return HuggingFacePipeline(pipeline=pipe)
except KeyError:
raise ValueError("MODELO NÃO SUPORTADO!")
except Exception as e:
raise RuntimeError(f"FALHA NUCLEAR NO MODELO: {str(e)}")
# ===================================================================
# INTERFACE DE COMBATE
# ===================================================================
def create_war_interface():
with gr.Blocks(theme=gr.themes.Soft(), title="⚔️ PDF Assault v1.0") as warzone:
state = gr.State({"db": None, "llm": None})
# Zona de Upload
with gr.Row(variant="panel"):
file_upload = gr.Files(label="DOCUMENTOS ALVO", file_types=[".pdf"])
process_btn = gr.Button("ATAQUE!", variant="stop")
# Controles Táticos
with gr.Row(variant="compact"):
model_selector = gr.Dropdown(list(LLM_MODELS.keys()), label="ARMA PRINCIPAL", value="TinyLlama-1.1B-Chat")
temp_slider = gr.Slider(0, 1, 0.7, label="POTÊNCIA DE FOGO")
deploy_btn = gr.Button("DEPLOY MODELO", variant="primary")
# Campo de Batalha
chatbot = gr.Chatbot(height=600, label="ZONA DE OPERAÇÕES")
msg_input = gr.Textbox(label="COMANDO DE ATAQUE", placeholder="Insira o alvo...")
# Sistema de Logs
combat_log = gr.Textbox(label="RELATÓRIO DE COMBATE", interactive=False)
# ===== Operações Militares =====
@process_btn.click(inputs=[file_upload], outputs=[state, combat_log])
def assault_documents(files):
try:
splits = DocumentProcessor.load_and_split(files)
db = VectorDBManager.create(splits)
return {"db": db, "llm": None}, "✅ DOCUMENTOS CAPTURADOS!"
except Exception as e:
return state.value, f"☠️ FALHA CATACLÍSMICA: {str(e)}"
@deploy_btn.click(inputs=[model_selector, temp_slider, state], outputs=[state, combat_log])
def deploy_model(model, temp, current_state):
try:
llm = LLMEngine.initialize(model, temp)
current_state["llm"] = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=current_state["db"].as_retriever(),
memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
return_source_documents=True
)
return current_state, f"🚀 {model} PRONTO PARA COMBATE!"
except Exception as e:
return current_state, f"💥 FALHA NO DEPLOY: {str(e)}"
@msg_input.submit(inputs=[msg_input, chatbot, state], outputs=[msg_input, chatbot])
def execute_combat(command, history, state):
if not state["llm"]:
return command, history + [(command, "⚠️ MODELO NÃO DEPLOYADO!")]
try:
result = state["llm"]({"question": command, "chat_history": history})
response = f"🎯 RESPOSTA:\n{result['answer']}\n\n"
response += "📌 INTEL:\n" + "\n".join(
f"Página {doc.metadata['page']+1}: {doc.page_content[:75]}..."
for doc in result["source_documents"][:3]
)
return "", history + [(command, response)]
except Exception as e:
return command, history + [(command, f"☢️ FALHA CRÍTICA: {str(e)}")]
return warzone
# ===================================================================
# INICIALIZAÇÃO DO SISTEMA
# ===================================================================
if __name__ == "__main__":
interface = create_war_interface()
interface.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
auth=("admin", "combat123"),
show_error=True
)