gemma-2-9b-it1

Runtime error

App Files Files Community

Leri777 commited on Oct 9, 2024

Commit

b78e9ba

verified ·

1 Parent(s): 4df36c7

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -11

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 from logging.handlers import RotatingFileHandler
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain_huggingface import HuggingFacePipeline
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
@@ -19,22 +19,23 @@ logger.addHandler(file_handler)
 logger.debug("Application started")
 model_id = "google/gemma-2-9b-it"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Load model with GPU availability check
 if torch.cuda.is_available():
     logger.debug("GPU is available. Proceeding with GPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto",
         torch_dtype=torch.bfloat16,
     )
 else:
     logger.warning("GPU is not available. Proceeding with CPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         low_cpu_mem_usage=True,
-        use_auth_token=os.getenv('HF_TOKEN'),
     )
 model.eval()
@@ -54,8 +55,6 @@ pipe = pipeline(
 # Initialize HuggingFacePipeline model for LangChain
 chat_model = HuggingFacePipeline(pipeline=pipe)
-logger.debug("Model and tokenizer loaded successfully")
 # Define the conversation template for LangChain
 template = """<|im_start|>system
 {system_prompt}
@@ -70,12 +69,12 @@ template = """<|im_start|>system
 prompt = PromptTemplate(
     template=template, input_variables=["system_prompt", "history", "human_input"]
 )
-chain = LLMChain(llm=chat_model, prompt=prompt)
 # Prediction function using LangChain and model
-def predict(message, history=[]):
     formatted_history = "\n".join(
-        [f"<|im_start|>{entry['role']}\n{entry['content']}<|im_end|>" for entry in history]
     )
     system_prompt = "You are a helpful coding assistant."
@@ -93,9 +92,10 @@ def predict(message, history=[]):
 # Gradio UI
 interface = gr.Interface(
     fn=predict,
-    inputs=gr.Textbox(label="User input"),
     outputs="text",
-    allow_flagging='never',
     live=True,
 )

 from logging.handlers import RotatingFileHandler
 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, GemmaTokenizerFast, pipeline
 from langchain_huggingface import HuggingFacePipeline
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 logger.debug("Application started")
 model_id = "google/gemma-2-9b-it"
+tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
 # Load model with GPU availability check
 if torch.cuda.is_available():
     logger.debug("GPU is available. Proceeding with GPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="auto",
         torch_dtype=torch.bfloat16,
     )
 else:
     logger.warning("GPU is not available. Proceeding with CPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="cpu",
         low_cpu_mem_usage=True,
+        token=os.getenv('HF_TOKEN'),
     )
 model.eval()
 # Initialize HuggingFacePipeline model for LangChain
 chat_model = HuggingFacePipeline(pipeline=pipe)
 # Define the conversation template for LangChain
 template = """<|im_start|>system
 {system_prompt}
 prompt = PromptTemplate(
     template=template, input_variables=["system_prompt", "history", "human_input"]
 )
+chain = prompt | chat_model
 # Prediction function using LangChain and model
+def predict(message, chat_history=[]):
     formatted_history = "\n".join(
+        [f"<|im_start|>{entry['role']}\n{entry['content']}<|im_end|>" for entry in chat_history]
     )
     system_prompt = "You are a helpful coding assistant."
 # Gradio UI
 interface = gr.Interface(
     fn=predict,
+    inputs=[
+        gr.Textbox(label="User input")
+    ],
     outputs="text",
     live=True,
 )