Spaces:
Running
Running
import gradio as gr | |
import os | |
import torch | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain_huggingface import HuggingFacePipeline | |
from langchain.memory import ConversationBufferMemory | |
from transformers import AutoTokenizer, pipeline | |
# =================================================================== | |
# CONFIGURAÇÃO RADICAL DE HARDWARE | |
# =================================================================== | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
TORCH_DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32 | |
MAX_MEMORY = "16GB" if DEVICE == "cpu" else None | |
# =================================================================== | |
# LISTA DE MODELOS OTIMIZADOS | |
# =================================================================== | |
LLM_MODELS = { | |
"TinyLlama-1.1B-Chat": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
"Phi-2": "microsoft/phi-2", | |
"Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.2", | |
"Zephyr-7B-Beta": "HuggingFaceH4/zephyr-7b-beta" | |
} | |
# =================================================================== | |
# NÚCLEO DO SISTEMA | |
# =================================================================== | |
class DocumentProcessor: | |
def load_and_split(files, chunk_size=512, chunk_overlap=64): | |
"""Carrega e processa documentos com paralelismo extremo""" | |
try: | |
loaders = [PyPDFLoader(file.name) for file in files] | |
return [page for loader in loaders for page in loader.load_and_split( | |
RecursiveCharacterTextSplitter( | |
chunk_size=chunk_size, | |
chunk_overlap=chunk_overlap, | |
separators=["\n\n", "\n", " ", ""] | |
) | |
)] | |
except Exception as e: | |
raise RuntimeError(f"FALHA CRÍTICA NO PROCESSAMENTO: {str(e)}") | |
class VectorDBManager: | |
def create(splits): | |
"""Cria vetorização com aceleração de hardware""" | |
return Chroma.from_documents( | |
documents=splits, | |
embedding=HuggingFaceEmbeddings(), | |
persist_directory="./chroma_db" | |
) | |
class LLMEngine: | |
def initialize(model_name, temp=0.7, max_tokens=512): | |
"""Inicialização agressiva do modelo com otimizações de baixo nível""" | |
try: | |
tokenizer = AutoTokenizer.from_pretrained(LLM_MODELS[model_name]) | |
pipe = pipeline( | |
"text-generation", | |
model=LLM_MODELS[model_name], | |
tokenizer=tokenizer, | |
device=DEVICE, | |
torch_dtype=TORCH_DTYPE, | |
max_new_tokens=max_tokens, | |
do_sample=True, | |
top_k=50, | |
temperature=temp, | |
model_kwargs={"load_in_4bit": True} if "cuda" in DEVICE else {} | |
) | |
return HuggingFacePipeline(pipeline=pipe) | |
except KeyError: | |
raise ValueError("MODELO NÃO SUPORTADO!") | |
except Exception as e: | |
raise RuntimeError(f"FALHA NUCLEAR NO MODELO: {str(e)}") | |
# =================================================================== | |
# INTERFACE DE COMBATE | |
# =================================================================== | |
def create_war_interface(): | |
with gr.Blocks(theme=gr.themes.Soft(), title="⚔️ PDF Assault v1.0") as warzone: | |
state = gr.State({"db": None, "llm": None}) | |
# Zona de Upload | |
with gr.Row(variant="panel"): | |
file_upload = gr.Files(label="DOCUMENTOS ALVO", file_types=[".pdf"]) | |
process_btn = gr.Button("ATAQUE!", variant="stop") | |
# Controles Táticos | |
with gr.Row(variant="compact"): | |
model_selector = gr.Dropdown(list(LLM_MODELS.keys()), label="ARMA PRINCIPAL", value="TinyLlama-1.1B-Chat") | |
temp_slider = gr.Slider(0, 1, 0.7, label="POTÊNCIA DE FOGO") | |
deploy_btn = gr.Button("DEPLOY MODELO", variant="primary") | |
# Campo de Batalha | |
chatbot = gr.Chatbot(height=600, label="ZONA DE OPERAÇÕES") | |
msg_input = gr.Textbox(label="COMANDO DE ATAQUE", placeholder="Insira o alvo...") | |
# Sistema de Logs | |
combat_log = gr.Textbox(label="RELATÓRIO DE COMBATE", interactive=False) | |
# ===== Operações Militares ===== | |
def assault_documents(files): | |
try: | |
splits = DocumentProcessor.load_and_split(files) | |
db = VectorDBManager.create(splits) | |
return {"db": db, "llm": None}, "✅ DOCUMENTOS CAPTURADOS!" | |
except Exception as e: | |
return state.value, f"☠️ FALHA CATACLÍSMICA: {str(e)}" | |
def deploy_model(model, temp, current_state): | |
try: | |
llm = LLMEngine.initialize(model, temp) | |
current_state["llm"] = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
retriever=current_state["db"].as_retriever(), | |
memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True), | |
return_source_documents=True | |
) | |
return current_state, f"🚀 {model} PRONTO PARA COMBATE!" | |
except Exception as e: | |
return current_state, f"💥 FALHA NO DEPLOY: {str(e)}" | |
def execute_combat(command, history, state): | |
if not state["llm"]: | |
return command, history + [(command, "⚠️ MODELO NÃO DEPLOYADO!")] | |
try: | |
result = state["llm"]({"question": command, "chat_history": history}) | |
response = f"🎯 RESPOSTA:\n{result['answer']}\n\n" | |
response += "📌 INTEL:\n" + "\n".join( | |
f"Página {doc.metadata['page']+1}: {doc.page_content[:75]}..." | |
for doc in result["source_documents"][:3] | |
) | |
return "", history + [(command, response)] | |
except Exception as e: | |
return command, history + [(command, f"☢️ FALHA CRÍTICA: {str(e)}")] | |
return warzone | |
# =================================================================== | |
# INICIALIZAÇÃO DO SISTEMA | |
# =================================================================== | |
if __name__ == "__main__": | |
interface = create_war_interface() | |
interface.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, | |
auth=("admin", "combat123"), | |
show_error=True | |
) |