Spaces:

eneSadi
/

cosmos-llama-flask

Sleeping

App Files Files Community

eneSadi commited on 15 days ago

Commit

ff9863c

•

1 Parent(s): 7dea212

cosmos loading

Browse files

Files changed (2) hide show

app.py +32 -15
app_cosmos.py → app_gemma.py +15 -32

app.py CHANGED Viewed

@@ -1,26 +1,26 @@
 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-from huggingface_hub import login
-import os
-print("Google Gemma 2 Chatbot is starting...")
-# read access token from environment variable
-access_token = os.getenv('HF_TOKEN')
-login(access_token)
-model_id = "google/gemma-2-9b-it"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    device_map="auto",
     torch_dtype=torch.bfloat16,
 )
 print("Model loading completed")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
@@ -40,18 +40,35 @@ async def ask(request: Request):
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
-    messages = [
-        {"role": "user", "content": f"{prompt}"},
-    ]
     print("Messages:", messages)
     print("Tokenizer process started")
-    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
     print("Tokenizer process completed")
     print("Model process started")
-    outputs = model.generate(**input_ids, max_new_tokens=256)
     print("Tokenizer decode process started")
-    answer = tokenizer.decode(outputs[0]).split("<end_of_turn>")[1].strip()
     return {"answer": answer}

 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+print("COSMOS Llama Chatbot is starting...")
+model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.bfloat16,
+    device_map="auto",
 )
 print("Model loading completed")
+# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
+initial_message = [
+    {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
+    # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
+]
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
+    messages = initial_message.copy()
+    messages.append({"role": "user", "content": f"{prompt}"})
     print("Messages:", messages)
     print("Tokenizer process started")
+    input_ids = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(model.device)
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
     print("Tokenizer process completed")
     print("Model process started")
+    outputs = model.generate(
+        input_ids,
+        max_new_tokens=256,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.9,
+    )
+    response = outputs[0][input_ids.shape[-1]:]
     print("Tokenizer decode process started")
+    answer = tokenizer.decode(response, skip_special_tokens=True)
     return {"answer": answer}

app_cosmos.py → app_gemma.py RENAMED Viewed

@@ -1,26 +1,26 @@
 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-print("COSMOS Llama Chatbot is starting...")
-model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.bfloat16,
     device_map="auto",
 )
 print("Model loading completed")
-# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
-initial_message = [
-    {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
-    # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
-]
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
@@ -40,35 +40,18 @@ async def ask(request: Request):
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
-    messages = initial_message.copy()
-    messages.append({"role": "user", "content": f"{prompt}"})
     print("Messages:", messages)
     print("Tokenizer process started")
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    terminators = [
-        tokenizer.eos_token_id,
-        tokenizer.convert_tokens_to_ids("<|eot_id|>")
-    ]
     print("Tokenizer process completed")
     print("Model process started")
-    outputs = model.generate(
-        input_ids,
-        max_new_tokens=256,
-        eos_token_id=terminators,
-        do_sample=True,
-        temperature=0.6,
-        top_p=0.9,
-    )
-    response = outputs[0][input_ids.shape[-1]:]
     print("Tokenizer decode process started")
-    answer = tokenizer.decode(response, skip_special_tokens=True)
     return {"answer": answer}

 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+from huggingface_hub import login
+import os
+print("Google Gemma 2 Chatbot is starting...")
+# read access token from environment variable
+access_token = os.getenv('HF_TOKEN')
+login(access_token)
+model_id = "google/gemma-2-9b-it"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
+    torch_dtype=torch.bfloat16,
 )
 print("Model loading completed")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
+    messages = [
+        {"role": "user", "content": f"{prompt}"},
+    ]
     print("Messages:", messages)
     print("Tokenizer process started")
+    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
     print("Tokenizer process completed")
     print("Model process started")
+    outputs = model.generate(**input_ids, max_new_tokens=256)
     print("Tokenizer decode process started")
+    answer = tokenizer.decode(outputs[0]).split("<end_of_turn>")[1].strip()
     return {"answer": answer}