import streamlit as st from huggingface_hub import InferenceClient # Access your Hugging Face token (stored as a secret in the environment) hf_token = st.secrets["Hf_Token"] # Streamlit App UI st.title("LLM Model Inference with Streamlit") # Dropdown for selecting between LLaMA 3 models model_id = st.selectbox( "Select a LLaMA model", ["meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-3.2-1B-Instruct","meta-llama/Meta-Llama-3-8B-Instruct"] # Replace with the correct model names ) user_input = st.chat_input("Enter a text prompt:") # Initialize the Inference Client client = InferenceClient(token=hf_token) # Button to trigger the inference if user_input: with st.spinner(f"Generating text using {model_id}..."): # Perform inference using the selected model response = client.chat.completions.create( model=model_id, messages=[ {"role": "system", "content": "You are a question answering assistant."}, {"role": "user", "content": user_input} ], max_tokens=500, stream=False ) st.success("Text generated!") st.write(response['choices'][0]['message']['content']) else: st.warning("Please enter a prompt to generate text.")