File size: 7,054 Bytes
3ec9224
6b7ae1b
aad7110
ccd769b
9278ac3
6b7ae1b
9278ac3
6b7ae1b
9278ac3
aad7110
f08873e
ccd769b
7e34d60
6b7ae1b
aad7110
 
 
 
 
 
2073925
aad7110
 
 
 
 
 
 
 
 
6b7ae1b
aad7110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b7ae1b
aad7110
 
 
 
 
 
 
 
 
6b7ae1b
aad7110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b7ae1b
aad7110
 
 
 
 
 
71bcd22
aad7110
 
 
 
71bcd22
aad7110
 
 
 
 
9278ac3
aad7110
 
 
2073925
aad7110
 
 
 
 
 
55cb274
aad7110
 
 
55cb274
aad7110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9d9831
55cb274
aad7110
 
 
 
 
 
 
55cb274
aad7110
 
 
71bcd22
aad7110
 
 
71bcd22
aad7110
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import gradio as gr
import os
import torch

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory

from transformers import AutoTokenizer, pipeline

# ===================================================================
# CONFIGURAÇÃO RADICAL DE HARDWARE
# ===================================================================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TORCH_DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
MAX_MEMORY = "16GB" if DEVICE == "cpu" else None

# ===================================================================
# LISTA DE MODELOS OTIMIZADOS
# ===================================================================
LLM_MODELS = {
    "TinyLlama-1.1B-Chat": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    "Phi-2": "microsoft/phi-2",
    "Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.2",
    "Zephyr-7B-Beta": "HuggingFaceH4/zephyr-7b-beta"
}

# ===================================================================
# NÚCLEO DO SISTEMA
# ===================================================================
class DocumentProcessor:
    @staticmethod
    def load_and_split(files, chunk_size=512, chunk_overlap=64):
        """Carrega e processa documentos com paralelismo extremo"""
        try:
            loaders = [PyPDFLoader(file.name) for file in files]
            return [page for loader in loaders for page in loader.load_and_split(
                RecursiveCharacterTextSplitter(
                    chunk_size=chunk_size,
                    chunk_overlap=chunk_overlap,
                    separators=["\n\n", "\n", " ", ""]
                )
            )]
        except Exception as e:
            raise RuntimeError(f"FALHA CRÍTICA NO PROCESSAMENTO: {str(e)}")

class VectorDBManager:
    @staticmethod
    def create(splits):
        """Cria vetorização com aceleração de hardware"""
        return Chroma.from_documents(
            documents=splits,
            embedding=HuggingFaceEmbeddings(),
            persist_directory="./chroma_db"
        )

class LLMEngine:
    @staticmethod
    def initialize(model_name, temp=0.7, max_tokens=512):
        """Inicialização agressiva do modelo com otimizações de baixo nível"""
        try:
            tokenizer = AutoTokenizer.from_pretrained(LLM_MODELS[model_name])
            
            pipe = pipeline(
                "text-generation",
                model=LLM_MODELS[model_name],
                tokenizer=tokenizer,
                device=DEVICE,
                torch_dtype=TORCH_DTYPE,
                max_new_tokens=max_tokens,
                do_sample=True,
                top_k=50,
                temperature=temp,
                model_kwargs={"load_in_4bit": True} if "cuda" in DEVICE else {}
            )
            
            return HuggingFacePipeline(pipeline=pipe)
        except KeyError:
            raise ValueError("MODELO NÃO SUPORTADO!")
        except Exception as e:
            raise RuntimeError(f"FALHA NUCLEAR NO MODELO: {str(e)}")

# ===================================================================
# INTERFACE DE COMBATE
# ===================================================================
def create_war_interface():
    with gr.Blocks(theme=gr.themes.Soft(), title="⚔️ PDF Assault v1.0") as warzone:
        state = gr.State({"db": None, "llm": None})
        
        # Zona de Upload
        with gr.Row(variant="panel"):
            file_upload = gr.Files(label="DOCUMENTOS ALVO", file_types=[".pdf"])
            process_btn = gr.Button("ATAQUE!", variant="stop")
        
        # Controles Táticos
        with gr.Row(variant="compact"):
            model_selector = gr.Dropdown(list(LLM_MODELS.keys()), label="ARMA PRINCIPAL", value="TinyLlama-1.1B-Chat")
            temp_slider = gr.Slider(0, 1, 0.7, label="POTÊNCIA DE FOGO")
            deploy_btn = gr.Button("DEPLOY MODELO", variant="primary")
        
        # Campo de Batalha
        chatbot = gr.Chatbot(height=600, label="ZONA DE OPERAÇÕES")
        msg_input = gr.Textbox(label="COMANDO DE ATAQUE", placeholder="Insira o alvo...")
        
        # Sistema de Logs
        combat_log = gr.Textbox(label="RELATÓRIO DE COMBATE", interactive=False)

        # ===== Operações Militares =====
        @process_btn.click(inputs=[file_upload], outputs=[state, combat_log])
        def assault_documents(files):
            try:
                splits = DocumentProcessor.load_and_split(files)
                db = VectorDBManager.create(splits)
                return {"db": db, "llm": None}, "✅ DOCUMENTOS CAPTURADOS!"
            except Exception as e:
                return state.value, f"☠️ FALHA CATACLÍSMICA: {str(e)}"

        @deploy_btn.click(inputs=[model_selector, temp_slider, state], outputs=[state, combat_log])
        def deploy_model(model, temp, current_state):
            try:
                llm = LLMEngine.initialize(model, temp)
                current_state["llm"] = ConversationalRetrievalChain.from_llm(
                    llm=llm,
                    retriever=current_state["db"].as_retriever(),
                    memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True),
                    return_source_documents=True
                )
                return current_state, f"🚀 {model} PRONTO PARA COMBATE!"
            except Exception as e:
                return current_state, f"💥 FALHA NO DEPLOY: {str(e)}"

        @msg_input.submit(inputs=[msg_input, chatbot, state], outputs=[msg_input, chatbot])
        def execute_combat(command, history, state):
            if not state["llm"]:
                return command, history + [(command, "⚠️ MODELO NÃO DEPLOYADO!")]
            
            try:
                result = state["llm"]({"question": command, "chat_history": history})
                response = f"🎯 RESPOSTA:\n{result['answer']}\n\n"
                response += "📌 INTEL:\n" + "\n".join(
                    f"Página {doc.metadata['page']+1}: {doc.page_content[:75]}..." 
                    for doc in result["source_documents"][:3]
                )
                return "", history + [(command, response)]
            except Exception as e:
                return command, history + [(command, f"☢️ FALHA CRÍTICA: {str(e)}")]

    return warzone

# ===================================================================
# INICIALIZAÇÃO DO SISTEMA
# ===================================================================
if __name__ == "__main__":
    interface = create_war_interface()
    interface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        auth=("admin", "combat123"),
        show_error=True
    )