Spaces:
Runtime error
Runtime error
import transformers | |
import torch | |
import os | |
from fastapi import FastAPI | |
from transformers import AutoTokenizer | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
app = FastAPI() | |
def llama(): | |
llm = Llama( | |
model_path="./llama-2-7b-chat.Q4_K_M.gguf", | |
# n_gpu_layers=-1, # Uncomment to use GPU acceleration | |
# seed=1337, # Uncomment to set a specific seed | |
# n_ctx=2048, # Uncomment to increase the context window | |
) | |
output = llm( | |
"Q: Name the planets in the solar system? A: ", # Prompt | |
max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window | |
stop=["Q:", "\n"], # Stop generating just before the model would generate a new question | |
echo=True # Echo the prompt back in the output | |
) # Generate a completion, can also call create_completion | |
return {"output": output} | |