Spaces:

wiklif
/

my-api

Sleeping

App Files Files Community

wiklif commited on Jul 24, 2024

Commit

8ad8716

1 Parent(s): f1cb75e

dodano accelerate i lepsze logowanie błędów

Browse files

Files changed (2) hide show

app.py +46 -19
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,31 +1,62 @@
 import spaces
 import gradio as gr
 import transformers
 import torch
-import os
 from huggingface_hub import login
 model_id = "meta-llama/Meta-Llama-3.1-8B"
 @spaces.GPU(duration=60)
 def load_pipeline():
-    # Zaloguj się używając tokena
-    login(token=os.environ.get("MY_API_LLAMA_3_1"))
-    return transformers.pipeline(
-        "text-generation",
-        model=model_id,
-        model_kwargs={"torch_dtype": torch.bfloat16},
-        device_map="auto"
-    )
-pipeline = load_pipeline()
 def generate_response(chat, kwargs):
-    output = pipeline(chat, **kwargs)[0]['generated_text']
-    if output.endswith("</s>"):
-        output = output[:-4]
-    return output
 def function(prompt, history=[]):
     chat = "<s>"
@@ -42,11 +73,7 @@ def function(prompt, history=[]):
         seed=1337
     )
-    try:
-        output = generate_response(chat, kwargs)
-        return output
-    except:
-        return ''
 # Interfejs Gradio
 interface = gr.ChatInterface(

+import os
 import spaces
 import gradio as gr
 import transformers
 import torch
 from huggingface_hub import login
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 model_id = "meta-llama/Meta-Llama-3.1-8B"
 @spaces.GPU(duration=60)
 def load_pipeline():
+    try:
+        # Zaloguj się używając tokena
+        login(token=os.environ.get("MY_API_LLAMA_3_1"))
+        logger.info("Login successful")
+        if torch.cuda.is_available():
+            logger.info(f"GPU available: {torch.cuda.get_device_name(0)}")
+            device_map = "auto"
+            torch_dtype = torch.bfloat16
+        else:
+            logger.warning("No GPU available, using CPU")
+            device_map = "cpu"
+            torch_dtype = torch.float32
+        pipeline = transformers.pipeline(
+            "text-generation",
+            model=model_id,
+            model_kwargs={"torch_dtype": torch_dtype},
+            device_map=device_map
+        )
+        logger.info("Model loaded successfully")
+        return pipeline
+    except Exception as e:
+        logger.error(f"Error loading model: {str(e)}")
+        raise
+try:
+    pipeline = load_pipeline()
+except Exception as e:
+    logger.error(f"Failed to load pipeline: {str(e)}")
+    pipeline = None
 def generate_response(chat, kwargs):
+    if pipeline is None:
+        return "Model nie został załadowany poprawnie. Proszę spróbować później."
+    try:
+        output = pipeline(chat, **kwargs)[0]['generated_text']
+        if output.endswith("</s>"):
+            output = output[:-4]
+        return output
+    except Exception as e:
+        logger.error(f"Error generating response: {str(e)}")
+        return f"Wystąpił błąd podczas generowania odpowiedzi: {str(e)}"
 def function(prompt, history=[]):
     chat = "<s>"
         seed=1337
     )
+    return generate_response(chat, kwargs)
 # Interfejs Gradio
 interface = gr.ChatInterface(

requirements.txt CHANGED Viewed

@@ -4,3 +4,4 @@ numpy<2
 torch
 transformers
 bitsandbytes

 torch
 transformers
 bitsandbytes
+accelerate