Spaces:

eneSadi
/

cosmos-llama-flask

Sleeping

App Files Files Community

eneSadi commited on Jan 29

Commit

0235536

unverified ·

1 Parent(s): 1545115

activate NER extraction

Browse files

Files changed (2) hide show

app.py +41 -15
app_keyword_ner.py → app_gemma.py +15 -41

app.py CHANGED Viewed

@@ -1,26 +1,35 @@
 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-from huggingface_hub import login
-import os
-print("Google Gemma 2 Chatbot is starting...")
-# read access token from environment variable
-access_token = os.getenv('HF_TOKEN')
-login(access_token)
-model_id = "google/gemma-2-9b-it"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    device_map="auto",
     torch_dtype=torch.bfloat16,
 )
 print("Model loading completed")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
@@ -40,18 +49,35 @@ async def ask(request: Request):
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
-    messages = [
-        {"role": "user", "content": f"{prompt}"},
-    ]
     print("Messages:", messages)
     print("Tokenizer process started")
-    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
     print("Tokenizer process completed")
     print("Model process started")
-    outputs = model.generate(**input_ids, max_new_tokens=512)
     print("Tokenizer decode process started")
-    answer = tokenizer.decode(outputs[0]).split("<end_of_turn>")[1].strip()
     return {"answer": answer}

 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+print("COSMOS Llama Chatbot is starting...")
+model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.bfloat16,
+    device_map="auto",
 )
 print("Model loading completed")
+# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
+initial_message = [
+    {"role": "system", "content":
+        """Kullanıcı sana bir haber metni verecek. Bu haber metninin önemli kısımlarını özetleyen 5 cümle çıkart. Aynı zamanda bu cümlelerin her birinden bir keyword extract et ve eğer varsa NER ile yer, kişi, tarih gibi alanları extract et. Yoksa karşısını boş bırak. Çıktıların şu formatta olsun:
+    1. Cümle: Cumhurbaşkanı Erdoğan tatile çıktı.
+    Keyword: tatil
+    NER: Cumhurbaşkanı Erdoğan
+    2. Cümle: ...
+    Keyword: ...
+    NER: ...
+    """
+     }
+]
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
+    messages = initial_message.copy()
+    messages.append({"role": "user", "content": f"{prompt}"})
     print("Messages:", messages)
     print("Tokenizer process started")
+    input_ids = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(model.device)
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
     print("Tokenizer process completed")
     print("Model process started")
+    outputs = model.generate(
+        input_ids,
+        max_new_tokens=512,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.9,
+    )
+    response = outputs[0][input_ids.shape[-1]:]
     print("Tokenizer decode process started")
+    answer = tokenizer.decode(response, skip_special_tokens=True)
     return {"answer": answer}

app_keyword_ner.py → app_gemma.py RENAMED Viewed

@@ -1,35 +1,26 @@
 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-print("COSMOS Llama Chatbot is starting...")
-model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.bfloat16,
     device_map="auto",
 )
 print("Model loading completed")
-# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
-initial_message = [
-    {"role": "system", "content":
-        """Kullanıcı sana bir haber metni verecek. Bu haber metninin önemli kısımlarını özetleyen 5 cümle çıkart. Aynı zamanda bu cümlelerin her birinden bir keyword extract et ve eğer varsa NER ile yer, kişi, tarih gibi alanları extract et. Yoksa karşısını boş bırak. Çıktıların şu formatta olsun:
-    1. Cümle: Cumhurbaşkanı Erdoğan tatile çıktı.
-    Keyword: tatil
-    NER: Cumhurbaşkanı Erdoğan
-    2. Cümle: ...
-    Keyword: ...
-    NER: ...
-    """
-     }
-]
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
@@ -49,35 +40,18 @@ async def ask(request: Request):
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
-    messages = initial_message.copy()
-    messages.append({"role": "user", "content": f"{prompt}"})
     print("Messages:", messages)
     print("Tokenizer process started")
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    terminators = [
-        tokenizer.eos_token_id,
-        tokenizer.convert_tokens_to_ids("<|eot_id|>")
-    ]
     print("Tokenizer process completed")
     print("Model process started")
-    outputs = model.generate(
-        input_ids,
-        max_new_tokens=512,
-        eos_token_id=terminators,
-        do_sample=True,
-        temperature=0.6,
-        top_p=0.9,
-    )
-    response = outputs[0][input_ids.shape[-1]:]
     print("Tokenizer decode process started")
-    answer = tokenizer.decode(response, skip_special_tokens=True)
     return {"answer": answer}

 from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+from huggingface_hub import login
+import os
+print("Google Gemma 2 Chatbot is starting...")
+# read access token from environment variable
+access_token = os.getenv('HF_TOKEN')
+login(access_token)
+model_id = "google/gemma-2-9b-it"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
+    torch_dtype=torch.bfloat16,
 )
 print("Model loading completed")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
+    messages = [
+        {"role": "user", "content": f"{prompt}"},
+    ]
     print("Messages:", messages)
     print("Tokenizer process started")
+    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
     print("Tokenizer process completed")
     print("Model process started")
+    outputs = model.generate(**input_ids, max_new_tokens=512)
     print("Tokenizer decode process started")
+    answer = tokenizer.decode(outputs[0]).split("<end_of_turn>")[1].strip()
     return {"answer": answer}