gemma-2-9b-it1

Runtime error

App Files Files Community

Leri777 commited on Oct 9, 2024

Commit

806020c

•

1 Parent(s): 7f13eee

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -21

app.py CHANGED Viewed

@@ -1,11 +1,15 @@
 import os
 import logging
 from logging.handlers import RotatingFileHandler
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, GemmaTokenizerFast, pipeline
 from langchain_huggingface import HuggingFacePipeline
 from langchain.prompts import PromptTemplate
 # Logging setup
 log_file = '/tmp/app_debug.log'
@@ -20,24 +24,31 @@ logger.debug("Application started")
 model_id = "google/gemma-2-9b-it"
 tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
-# Load model with GPU availability check
-if torch.cuda.is_available():
-    logger.debug("GPU is available. Proceeding with GPU setup.")
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map="auto",
-        torch_dtype=torch.bfloat16,
-    )
-else:
-    logger.warning("GPU is not available. Proceeding with CPU setup.")
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map="auto",
-        low_cpu_mem_usage=True,
-        token=os.getenv('HF_TOKEN'),
-    )
-model.eval()
 # Create Hugging Face pipeline
 pipe = pipeline(
@@ -91,11 +102,22 @@ def predict(message, chat_history=[]):
 # Gradio UI
 interface = gr.Interface(
     fn=predict,
-    inputs=gr.Textbox(label="User input"),
-    outputs="text",
     live=True,
 )
-interface.launch()
-logger.debug("Chat interface initialized and launched")

 import os
 import logging
+import time
+import random
 from logging.handlers import RotatingFileHandler
 import gradio as gr
 import torch
+from accelerate import Accelerator
 from transformers import AutoModelForCausalLM, GemmaTokenizerFast, pipeline
 from langchain_huggingface import HuggingFacePipeline
 from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
 # Logging setup
 log_file = '/tmp/app_debug.log'
 model_id = "google/gemma-2-9b-it"
 tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
+# Function to load model with GPU availability check
+def load_model():
+    if torch.cuda.is_available():
+        logger.debug("GPU is available. Proceeding with GPU setup.")
+        return AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map="auto", torch_dtype=torch.bfloat16,
+        )
+    else:
+        logger.warning("GPU is not available. Proceeding with CPU setup.")
+        return AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map="auto", low_cpu_mem_usage=True, token=os.getenv('HF_TOKEN'),
+        )
+# Retry logic to load model with random delay
+model = None
+while model is None:
+    try:
+        model = load_model()
+        model.eval()
+    except Exception as e:
+        retry_delay = random.uniform(10, 30)  # Random delay between 10 to 30 seconds
+        logger.error(f"Failed to load model: {e}. Retrying in {retry_delay:.2f} seconds...")
+        time.sleep(retry_delay)
 # Create Hugging Face pipeline
 pipe = pipeline(
 # Gradio UI
 interface = gr.Interface(
     fn=predict,
+    inputs=[
+        gr.Textbox(label="User input"),
+        gr.State(),
+    ],
+    outputs="text", allow_flagging='never',
     live=True,
 )
+# Retry logic to launch interface with random delay
+while True:
+    try:
+        interface.launch()
+        break
+    except Exception as e:
+        retry_delay = random.uniform(10, 30)  # Random delay between 10 to 30 seconds
+        logger.error(f"Failed to launch interface: {e}. Retrying in {retry_delay:.2f} seconds...")
+        time.sleep(retry_delay)
+logger.debug("Chat interface initialized and launched")