Spaces:
Running
Running
import gradio as gr | |
from ctransformers import AutoModelForCausalLM | |
model_name = 'TheBloke/Llama-2-7B-GGUF' | |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system. | |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.Q4_K_M.gguf", model_type="llama", gpu_layers=0) | |
def greet(name): | |
print(llm("AI is going to")) | |
return "Hello " + name + "!!" | |
iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
iface.launch() |