Spaces:

virtualina
/

test1

Running

File size: 545 Bytes

64ae079
1487e70
de874f5
b4b917c
de874f5
61178e1
9ce71be
64ae079
 
c1e91d2
 
64ae079

import gradio as gr
from ctransformers import AutoModelForCausalLM

model_name = 'TheBloke/Llama-2-7B-GGUF'

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.Q4_K_M.gguf", model_type="llama", gpu_layers=0)

def greet(name):
    print(llm("AI is going to"))
    return "Hello " + name  + "!!"

iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()