Spaces:
Sleeping
Sleeping
import streamlit as st | |
from huggingface_hub import InferenceClient | |
# Access your Hugging Face token (stored as a secret in the environment) | |
hf_token = st.secrets["Hf_Token"] | |
# Streamlit App UI | |
st.title("LLM Model Inference with Streamlit") | |
# Dropdown for selecting between LLaMA 3 models | |
model_id = st.selectbox( | |
"Select a LLaMA model", | |
["meta-llama/Llama-3.2-3B-Instruct","google/gemma-7b","meta-llama/Llama-3.2-1B-Instruct","meta-llama/Meta-Llama-3-8B-Instruct"] # Replace with the correct model names | |
) | |
user_input = st.chat_input("Enter a text prompt:") | |
# Initialize the Inference Client | |
client = InferenceClient(token=hf_token) | |
# Button to trigger the inference | |
if user_input: | |
with st.spinner(f"Generating text using {model_id}..."): | |
# Perform inference using the selected model | |
response = client.chat.completions.create( | |
model=model_id, | |
messages=[ | |
{"role": "system", "content": "You are a question answering assistant."}, | |
{"role": "user", "content": user_input} | |
], | |
max_tokens=500, | |
stream=False | |
) | |
st.success("Text generated!") | |
st.write(response['choices'][0]['message']['content']) | |
else: | |
st.warning("Please enter a prompt to generate text.") | |