Spaces:

Deepak7376
/

DocChatAI

Running

Deepak Yadav

replaced the llm model with gguf format model

7f98036 2 months ago

993 Bytes

	from langchain_ollama import OllamaLLM
	from llama_cpp import Llama
	from langchain_huggingface import HuggingFaceEmbeddings
	import streamlit as st


	@st.cache_resource
	def initialize_llm(model_name, temperature, top_p, max_tokens):
	# # Configure the LLM with additional parameters
	# llm = OllamaLLM(
	# model=model_name,
	# base_url="https://deepak7376-ollama-server.hf.space",
	# temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness)
	# max_tokens=max_tokens, # Limit the number of tokens in the output
	# top_p=top_p # Nucleus sampling for controlling diversity
	# )
	llm = Llama.from_pretrained(
	repo_id="bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
	filename="DeepSeek-R1-Distill-Qwen-1.5B-IQ4_XS.gguf",
	n_ctx=max_tokens

	)
	return llm

	@st.cache_resource
	def initialize_embeddings():
	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	return embeddings