Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
import requests | |
from llama_cpp import Llama | |
app = FastAPI() | |
llm = Llama(model_path="./tinyllama-1.1b-chat.gguf") | |
async def stream(item: dict): | |
if 'prompt' not in item.keys(): | |
raise ValueError("prompt é obrigatório") | |
prompt = item['prompt'] | |
temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2 | |
max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512 | |
return llm(prompt, max_tokens=max_tokens, temperature=temperatura) |