import gradio as gr import os import torch from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.chains import ConversationalRetrievalChain from langchain.embeddings import HuggingFaceEmbeddings from langchain_huggingface import HuggingFacePipeline from langchain.memory import ConversationBufferMemory from transformers import AutoTokenizer, pipeline # =================================================================== # CONFIGURAÇÃO RADICAL DE HARDWARE # =================================================================== DEVICE = "cuda" if torch.cuda.is_available() else "cpu" TORCH_DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32 MAX_MEMORY = "16GB" if DEVICE == "cpu" else None # =================================================================== # LISTA DE MODELOS OTIMIZADOS # =================================================================== LLM_MODELS = { "TinyLlama-1.1B-Chat": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "Phi-2": "microsoft/phi-2", "Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.2", "Zephyr-7B-Beta": "HuggingFaceH4/zephyr-7b-beta" } # =================================================================== # NÚCLEO DO SISTEMA # =================================================================== class DocumentProcessor: @staticmethod def load_and_split(files, chunk_size=512, chunk_overlap=64): """Carrega e processa documentos com paralelismo extremo""" try: loaders = [PyPDFLoader(file.name) for file in files] return [page for loader in loaders for page in loader.load_and_split( RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n\n", "\n", " ", ""] ) )] except Exception as e: raise RuntimeError(f"FALHA CRÍTICA NO PROCESSAMENTO: {str(e)}") class VectorDBManager: @staticmethod def create(splits): """Cria vetorização com aceleração de hardware""" return Chroma.from_documents( documents=splits, embedding=HuggingFaceEmbeddings(), persist_directory="./chroma_db" ) class LLMEngine: @staticmethod def initialize(model_name, temp=0.7, max_tokens=512): """Inicialização agressiva do modelo com otimizações de baixo nível""" try: tokenizer = AutoTokenizer.from_pretrained(LLM_MODELS[model_name]) pipe = pipeline( "text-generation", model=LLM_MODELS[model_name], tokenizer=tokenizer, device=DEVICE, torch_dtype=TORCH_DTYPE, max_new_tokens=max_tokens, do_sample=True, top_k=50, temperature=temp, model_kwargs={"load_in_4bit": True} if "cuda" in DEVICE else {} ) return HuggingFacePipeline(pipeline=pipe) except KeyError: raise ValueError("MODELO NÃO SUPORTADO!") except Exception as e: raise RuntimeError(f"FALHA NUCLEAR NO MODELO: {str(e)}") # =================================================================== # INTERFACE DE COMBATE # =================================================================== def create_war_interface(): with gr.Blocks(theme=gr.themes.Soft(), title="⚔️ PDF Assault v1.0") as warzone: state = gr.State({"db": None, "llm": None}) # Zona de Upload with gr.Row(variant="panel"): file_upload = gr.Files(label="DOCUMENTOS ALVO", file_types=[".pdf"]) process_btn = gr.Button("ATAQUE!", variant="stop") # Controles Táticos with gr.Row(variant="compact"): model_selector = gr.Dropdown(list(LLM_MODELS.keys()), label="ARMA PRINCIPAL", value="TinyLlama-1.1B-Chat") temp_slider = gr.Slider(0, 1, 0.7, label="POTÊNCIA DE FOGO") deploy_btn = gr.Button("DEPLOY MODELO", variant="primary") # Campo de Batalha chatbot = gr.Chatbot(height=600, label="ZONA DE OPERAÇÕES") msg_input = gr.Textbox(label="COMANDO DE ATAQUE", placeholder="Insira o alvo...") # Sistema de Logs combat_log = gr.Textbox(label="RELATÓRIO DE COMBATE", interactive=False) # ===== Operações Militares ===== @process_btn.click(inputs=[file_upload], outputs=[state, combat_log]) def assault_documents(files): try: splits = DocumentProcessor.load_and_split(files) db = VectorDBManager.create(splits) return {"db": db, "llm": None}, "✅ DOCUMENTOS CAPTURADOS!" except Exception as e: return state.value, f"☠️ FALHA CATACLÍSMICA: {str(e)}" @deploy_btn.click(inputs=[model_selector, temp_slider, state], outputs=[state, combat_log]) def deploy_model(model, temp, current_state): try: llm = LLMEngine.initialize(model, temp) current_state["llm"] = ConversationalRetrievalChain.from_llm( llm=llm, retriever=current_state["db"].as_retriever(), memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True), return_source_documents=True ) return current_state, f"🚀 {model} PRONTO PARA COMBATE!" except Exception as e: return current_state, f"💥 FALHA NO DEPLOY: {str(e)}" @msg_input.submit(inputs=[msg_input, chatbot, state], outputs=[msg_input, chatbot]) def execute_combat(command, history, state): if not state["llm"]: return command, history + [(command, "⚠️ MODELO NÃO DEPLOYADO!")] try: result = state["llm"]({"question": command, "chat_history": history}) response = f"🎯 RESPOSTA:\n{result['answer']}\n\n" response += "📌 INTEL:\n" + "\n".join( f"Página {doc.metadata['page']+1}: {doc.page_content[:75]}..." for doc in result["source_documents"][:3] ) return "", history + [(command, response)] except Exception as e: return command, history + [(command, f"☢️ FALHA CRÍTICA: {str(e)}")] return warzone # =================================================================== # INICIALIZAÇÃO DO SISTEMA # =================================================================== if __name__ == "__main__": interface = create_war_interface() interface.launch( server_name="0.0.0.0", server_port=7860, share=False, auth=("admin", "combat123"), show_error=True )