Spaces:
Sleeping
Sleeping
File size: 1,307 Bytes
31c4621 a0de172 31c4621 87ad6b5 31c4621 194b12e d04a492 384780e 31c4621 5254778 31c4621 5254778 384780e d04a492 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import streamlit as st
from huggingface_hub import InferenceClient
# Access your Hugging Face token (stored as a secret in the environment)
hf_token = st.secrets["Hf_Token"]
# Streamlit App UI
st.title("LLM Model Inference with Streamlit")
# Dropdown for selecting between LLaMA 3 models
model_id = st.selectbox(
"Select a LLaMA model",
["meta-llama/Llama-3.2-3B-Instruct","google/gemma-7b","meta-llama/Llama-3.2-1B-Instruct","meta-llama/Meta-Llama-3-8B-Instruct"] # Replace with the correct model names
)
user_input = st.chat_input("Enter a text prompt:")
# Initialize the Inference Client
client = InferenceClient(token=hf_token)
# Button to trigger the inference
if user_input:
with st.spinner(f"Generating text using {model_id}..."):
# Perform inference using the selected model
response = client.chat.completions.create(
model=model_id,
messages=[
{"role": "system", "content": "You are a question answering assistant."},
{"role": "user", "content": user_input}
],
max_tokens=500,
stream=False
)
st.success("Text generated!")
st.write(response['choices'][0]['message']['content'])
else:
st.warning("Please enter a prompt to generate text.")
|