Spaces:

gufett0
/

chatbot-llamaindex

Sleeping

gufett0 commited on Sep 16, 2024

Commit

0467f17

1 Parent(s): 997eb0b

changed class interface with iterator

Files changed (2) hide show

backend.py CHANGED Viewed

@@ -20,7 +20,7 @@ login(huggingface_token)
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-"""model_id = "google/gemma-2-2b-it"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
@@ -28,7 +28,7 @@ model = AutoModelForCausalLM.from_pretrained(
     token=True)
 model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
-model.eval()"""
 # what models will be used by LlamaIndex:
 Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model_id = "google/gemma-2-2b-it"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
     token=True)
 model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
+model.eval()
 # what models will be used by LlamaIndex:
 Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")

interface.py CHANGED Viewed

@@ -11,15 +11,17 @@ from pydantic import Field, field_validator
 class GemmaLLMInterface(CustomLLM):
     def __init__(self, model_id: str = "google/gemma-2-2b-it", **kwargs):
         super().__init__(**kwargs)
-        object.__setattr__(self, "model_id", model_id)  # Use object.__setattr__ to bypass Pydantic restrictions
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_id,
             device_map="auto",
             torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
         )
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        self.context_window = 8192
-        self.num_output = 2048
     def _format_prompt(self, message: str) -> str:
         return (
@@ -32,7 +34,7 @@ class GemmaLLMInterface(CustomLLM):
         return LLMMetadata(
             context_window=self.context_window,
             num_output=self.num_output,
-            model_name=self.model_id,  # Passing the correct model ID here
         )

 class GemmaLLMInterface(CustomLLM):
     def __init__(self, model_id: str = "google/gemma-2-2b-it", **kwargs):
         super().__init__(**kwargs)
+        object.__setattr__(self, "model_id", model_id)  # Bypass Pydantic for model_id
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
             device_map="auto",
             torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
         )
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        object.__setattr__(self, "model", model)  # Bypass Pydantic for model
+        object.__setattr__(self, "tokenizer", tokenizer)  # Bypass Pydantic for tokenizer
+        object.__setattr__(self, "context_window", 8192)
+        object.__setattr__(self, "num_output", 2048)
     def _format_prompt(self, message: str) -> str:
         return (
         return LLMMetadata(
             context_window=self.context_window,
             num_output=self.num_output,
+            model_name=self.model_id,  # Returning the correct model ID
         )