import streamlit as st import subprocess import sys import time def install(package): subprocess.check_call([sys.executable, "-m", "pip", "install", package]) install("llama-cpp-python") from llama_cpp import Llama prompt = st.chat_input("Say something") if prompt: llm = Llama(model_path="Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf") print(f"Started {time.time()}") start = time.time() r=llm(prompt, max_tokens=1000) end = time.time() print(f"The Generation time for 1000 tokens is : {end - start}") l="Nothing" try: l=r["choices"][0]["text"] except Exception as e: print(e) st.write(f"User has sent the following prompt: {prompt} with response: {l} ")