Spaces:

eneSadi
/

cosmos-llama-flask

Sleeping

App Files Files Community

cosmos-llama-flask / app.py

eneSadi

cosmos loading

ff9863c unverified 15 days ago

raw

history blame contribute delete

2.12 kB

	from fastapi import FastAPI, Request
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	print("COSMOS Llama Chatbot is starting...")

	model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"

	print("Model loading started")
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	print("Model loading completed")

	# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
	initial_message = [
	{"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
	# Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
	]

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print("Selected device:", device)

	app = FastAPI()


	@app.get('/')
	def home():
	return {"hello": "Bitfumes"}


	@app.post('/ask')
	async def ask(request: Request):
	data = await request.json()
	prompt = data.get("prompt")
	if not prompt:
	return {"error": "Prompt is missing"}

	print("Device of the model:", model.device)
	messages = initial_message.copy()
	messages.append({"role": "user", "content": f"{prompt}"})

	print("Messages:", messages)
	print("Tokenizer process started")
	input_ids = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	return_tensors="pt"
	).to(model.device)

	terminators = [
	tokenizer.eos_token_id,
	tokenizer.convert_tokens_to_ids("<\|eot_id\|>")
	]
	print("Tokenizer process completed")

	print("Model process started")
	outputs = model.generate(
	input_ids,
	max_new_tokens=256,
	eos_token_id=terminators,
	do_sample=True,
	temperature=0.6,
	top_p=0.9,
	)
	response = outputs[0][input_ids.shape[-1]:]

	print("Tokenizer decode process started")
	answer = tokenizer.decode(response, skip_special_tokens=True)

	return {"answer": answer}