Spaces:

gufett0
/

chatbot-llamaindex

Sleeping

App Files Files Community

gufett0 commited on Sep 13, 2024

Commit

643e1b9

1 Parent(s): 57ae88a

first app files

Browse files

Files changed (6) hide show

.gitignore +1 -0
app.py +15 -0
backend.py +87 -0
data/blockchainprova.txt +0 -0
interface.py +44 -0
requirements.txt +10 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ /myenv

app.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from backend import handle_query
+import gradio as gr
+iface = gr.ChatInterface(
+    fn=handle_query,
+    title="PDF Information and Inference",
+    description="Retrieval-Augmented Generation - Ask me anything about the content of the PDF.",
+    #examples=["What is the main topic of the document?", "Can you summarize the key points?"],
+    #cache_examples=True,
+)
+if __name__ == "__main__":
+    iface.launch()

backend.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+import os
+from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
+from interface import GemmaLLMInterface
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.embeddings.instructor import InstructorEmbedding
+import gradio as gr
+from llama_index.core import ChatPromptTemplate
+from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader, PromptTemplate, load_index_from_storage
+from llama_index.core.node_parser import SentenceSplitter
+model_id = "google/gemma-2-2b-it"
+tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype= torch.float16 if torch.cuda.is_available() else torch.float32,
+)
+# what models will be used by LlamaIndex:
+Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
+Settings.llm  = GemmaLLMInterface(model=model, tokenizer=tokenizer)
+"""os.environ["KAGGLE_USERNAME"] = "middi0"
+os.environ["KAGGLE_KEY"] = "b7eed1ea5cfb30e8eb13b085af2e427b"
+# Let's load Gemma using Keras
+gemma_model_id = "gemma2_instruct_2b_en"
+gemma = keras_nlp.models.GemmaCausalLM.from_preset(gemma_model_id)
+# This settings define what models will be used by LlamaIndex
+Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
+Settings.llm = GemmaLLMInterface(model=gemma)"""
+############################---------------------------------
+# CHUNKING
+# Reading documents from disk
+documents = SimpleDirectoryReader(input_files=["data/blockchainprova.txt"]).load_data()
+# Splitting the document into chunks with
+# predefined size and overlap
+parser = SentenceSplitter.from_defaults(
+    chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n"
+)
+nodes = parser.get_nodes_from_documents(documents)
+#print(nodes[6].text)
+# BUILD A VECTOR STORE
+index = VectorStoreIndex(nodes)
+def handle_query(query_str, chathistory):
+    qa_prompt_str = (
+      "Context information is below.\n"
+      "---------------------\n"
+      "{context_str}\n"
+      "---------------------\n"
+      "Given the context information and not prior knowledge, "
+      "answer the question: {query_str}\n"
+    )
+  # Text QA Prompt
+    chat_text_qa_msgs = [
+      (
+          "system",
+          "Sei un assistente italiano di nome Tizio che risponde solo alle domande o richieste pertinenti. ",
+      ),
+      ("user", qa_prompt_str),
+    ]
+    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
+    index = VectorStoreIndex(nodes)
+    result = index.as_query_engine(text_qa_template=text_qa_template).query(query_str)
+    response_text = result.response
+    # Remove any unwanted tokens like <end_of_turn>
+    cleaned_result = response_text.replace("<end_of_turn>", "").strip()
+    yield cleaned_result

data/blockchainprova.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

interface.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM
+from llama_index.core.llms import  CustomLLM, LLMMetadata, CompletionResponse, CompletionResponseGen
+from llama_index.core.llms.callbacks import llm_completion_callback
+from typing import Any
+class GemmaLLMInterface(CustomLLM):
+    model: Any
+    tokenizer: Any
+    context_window: int = 8192
+    num_output: int = 2048
+    model_name: str = "gemma_2"
+    class Config:
+        protected_namespaces = ()
+    def _format_prompt(self, message: str) -> str:
+        return (
+            f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"
+        )
+    @property
+    def metadata(self) -> LLMMetadata:
+        #Get LLM metadata.
+        return LLMMetadata(
+            context_window=self.context_window,
+            num_output=self.num_output,
+            model_name=self.model_name,
+        )
+    @llm_completion_callback()
+    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
+        prompt = self._format_prompt(prompt)
+        inputs = self.tokenizer(prompt, return_tensors="pt")
+        output = self.model.generate(**inputs, max_length=self.num_output)
+        raw_response = self.tokenizer.decode(output[0], skip_special_tokens=True)
+        response = raw_response[len(prompt):]
+        return CompletionResponse(text=response)
+    @llm_completion_callback()
+    def stream_complete(self, prompt: str, **kwargs: any) -> CompletionResponseGen:
+        response = self.complete(prompt).text
+        for token in response:
+            yield CompletionResponse(text=token)

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+python-dotenv
+llama-index
+llama-index-embeddings-huggingface
+llama-index-llms-huggingface
+llama-index-embeddings-instructor
+sentence-transformers==2.2.2
+llama-index-readers-web
+llama-index-readers-file
+gradio
+transformers