from langchain_ollama import OllamaLLM from llama_cpp import Llama from langchain_huggingface import HuggingFaceEmbeddings import streamlit as st @st.cache_resource def initialize_llm(model_name, temperature, top_p, max_tokens): # # Configure the LLM with additional parameters # llm = OllamaLLM( # model=model_name, # base_url="https://deepak7376-ollama-server.hf.space", # temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness) # max_tokens=max_tokens, # Limit the number of tokens in the output # top_p=top_p # Nucleus sampling for controlling diversity # ) llm = Llama.from_pretrained( repo_id="bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF", filename="DeepSeek-R1-Distill-Qwen-1.5B-IQ4_XS.gguf", n_ctx=max_tokens ) return llm @st.cache_resource def initialize_embeddings(): embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") return embeddings