RAGOndevice

Running on Zero

App Files Files Community

cutechicken commited on Dec 16, 2024

Commit

f4a0f87

verified ·

1 Parent(s): 0b984ea

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -19

app.py CHANGED Viewed

@@ -12,34 +12,69 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_ID = "CohereForAI/c4ai-command-r7b-12-2024"
 class ModelManager:
     def __init__(self):
-        self.model = None
-        self.tokenizer = None
-        self.setup_model()
-    def setup_model(self):
         try:
-            print("토크나이저 로딩 시작...")
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                MODEL_ID,
-                token=HF_TOKEN,
-                trust_remote_code=True
-            )
-            print("토크나이저 로딩 완료")
-            print("모델 로딩 시작...")
-            self.model = AutoModelForCausalLM.from_pretrained(
-                MODEL_ID,
                 token=HF_TOKEN,
                 device_map="auto",
-                trust_remote_code=True,
                 torch_dtype=torch.float16
             )
-            print("모델 로딩 완료")
         except Exception as e:
-            print(f"모델 로딩 중 오류 발생: {e}")
-            raise Exception(f"모델 로딩 실패: {e}")
 class ChatHistory:
     def __init__(self):

 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_ID = "CohereForAI/c4ai-command-r7b-12-2024"
+from transformers import pipeline
 class ModelManager:
     def __init__(self):
+        self.pipe = None
+        self.setup_pipeline()
+    def setup_pipeline(self):
         try:
+            print("파이프라인 초기화 시작...")
+            self.pipe = pipeline(
+                "text-generation",
+                model=MODEL_ID,
                 token=HF_TOKEN,
                 device_map="auto",
                 torch_dtype=torch.float16
             )
+            print("파이프라인 초기화 완료")
+        except Exception as e:
+            print(f"파이프라인 초기화 중 오류 발생: {e}")
+            raise Exception(f"파이프라인 초기화 실패: {e}")
+    def generate_response(self, messages, max_tokens=4000, temperature=0.7, top_p=0.9):
+        try:
+            # 메시지 형식 변환
+            prompt = ""
+            for msg in messages:
+                role = msg["role"]
+                content = msg["content"]
+                if role == "system":
+                    prompt += f"System: {content}\n"
+                elif role == "user":
+                    prompt += f"User: {content}\n"
+                elif role == "assistant":
+                    prompt += f"Assistant: {content}\n"
+            # 응답 생성
+            response = self.pipe(
+                prompt,
+                max_new_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                num_return_sequences=1,
+                pad_token_id=self.pipe.tokenizer.eos_token_id
+            )
+            # 응답 텍스트 추출 및 스트리밍 시뮬레이션
+            generated_text = response[0]['generated_text'][len(prompt):].strip()
+            words = generated_text.split()
+            # 단어 단위로 스트리밍
+            partial_response = ""
+            for word in words:
+                partial_response += word + " "
+                yield type('Response', (), {
+                    'choices': [type('Choice', (), {
+                        'delta': {'content': word + " "}
+                    })()]
+                })()
         except Exception as e:
+            raise Exception(f"응답 생성 실패: {e}")
 class ChatHistory:
     def __init__(self):