Spaces:
Runtime error
Runtime error
File size: 919 Bytes
9bf2007 bff48c8 dcd2d54 3e6fc0f 9bf2007 e48a0c0 2d5d217 e5e2748 3e6fc0f 9bf2007 e48a0c0 b63fd3c e48a0c0 e5e2748 9bf2007 e48a0c0 9bf2007 e48a0c0 9bf2007 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import transformers
import torch
import os
from fastapi import FastAPI
from transformers import AutoTokenizer
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
app = FastAPI()
@app.get("/")
def llama():
llm = Llama(
model_path="./llama-2-7b-chat.Q4_K_M.gguf",
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
# seed=1337, # Uncomment to set a specific seed
# n_ctx=2048, # Uncomment to increase the context window
)
output = llm(
"Q: Name the planets in the solar system? A: ", # Prompt
max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
echo=True # Echo the prompt back in the output
) # Generate a completion, can also call create_completion
return {"output": output}
|