Spaces:

yash009
/

textgeneration

Runtime error

File size: 919 Bytes

9bf2007
 
bff48c8
dcd2d54
3e6fc0f
9bf2007
 
 
e48a0c0
 
2d5d217
 
e5e2748
3e6fc0f
9bf2007
e48a0c0
b63fd3c
e48a0c0
 
 
e5e2748
9bf2007
e48a0c0
 
 
 
 
 
9bf2007
e48a0c0
9bf2007

import transformers
import torch
import os

from fastapi import FastAPI

from transformers import AutoTokenizer

from llama_cpp import Llama

from huggingface_hub import hf_hub_download

app = FastAPI()
@app.get("/")
def llama():
    llm = Llama(
      model_path="./llama-2-7b-chat.Q4_K_M.gguf",
      # n_gpu_layers=-1, # Uncomment to use GPU acceleration
      # seed=1337, # Uncomment to set a specific seed
      # n_ctx=2048, # Uncomment to increase the context window
    )

    output = llm(
      "Q: Name the planets in the solar system? A: ", # Prompt
      max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
      stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
      echo=True # Echo the prompt back in the output
    ) # Generate a completion, can also call create_completion
    
    return {"output": output}