Spaces:

Kr08
/

ASR

Build error

App Files Files Community

Kr08 commited on Nov 14, 2024

Commit

e8ce33d

verified ·

1 Parent(s): e499054

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -6

app.py CHANGED Viewed

@@ -19,21 +19,58 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 def load_qa_model():
-    """Load question-answering model"""
     try:
-        model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
-        qa_pipeline = pipeline(
-            "text-generation",
-            model="hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
-            model_kwargs={"torch_dtype": torch.bfloat16},
             device_map="auto",
             use_auth_token=os.getenv("HF_TOKEN")
         )
         return qa_pipeline
     except Exception as e:
         logger.error(f"Failed to load Q&A model: {str(e)}")
         return None
 def load_summarization_model():
     """Load summarization model"""
     try:

 logger = logging.getLogger(__name__)
 def load_qa_model():
+    """Load question-answering model with support for long input contexts."""
     try:
+        from transformers import AutoTokenizer, AutoModelForCausalLM
+        model_id = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
+        # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=os.getenv("HF_TOKEN"))
+        tokenizer.model_max_length = 8192  # Ensure the tokenizer can handle 8192 tokens
+        # Load the model
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            torch_dtype=torch.bfloat16,
             device_map="auto",
+            rope_scaling={
+                "type": "dynamic",  # Ensure compatibility with long contexts
+                "factor": 8.0
+            },
             use_auth_token=os.getenv("HF_TOKEN")
         )
+        # Load the pipeline
+        qa_pipeline = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=4096,  # Adjust as needed for your use case
+        )
         return qa_pipeline
     except Exception as e:
         logger.error(f"Failed to load Q&A model: {str(e)}")
         return None
+# def load_qa_model():
+#     """Load question-answering model"""
+#     try:
+#         model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+#         qa_pipeline = pipeline(
+#             "text-generation",
+#             model="hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
+#             model_kwargs={"torch_dtype": torch.bfloat16},
+#             device_map="auto",
+#             use_auth_token=os.getenv("HF_TOKEN")
+#         )
+#         return qa_pipeline
+#     except Exception as e:
+#         logger.error(f"Failed to load Q&A model: {str(e)}")
+#         return None
 def load_summarization_model():
     """Load summarization model"""
     try: