Fuelerchat

Sleeping

App Files Files Community

Fuelerchat / app.py

BroBro87

Update app.py

c00107f verified 9 months ago

raw

history blame

2.22 kB


	#https://docs.google.com/document/d/1hY5ItC8Mewyk-90Q--CGr50wBbZBjPrkYu4NtiBVre4/edit?usp=sharing
	#Inference takes 6-7 mins per query
	import logging
	import sys
	import gradio as gr
	from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
	from llama_index.llms import LlamaCPP
	from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
	from langchain.embeddings.huggingface import HuggingFaceEmbeddings

	# Set up logging
	logging.basicConfig(stream=sys.stdout, level=logging.INFO)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

	def configure_llama_model():
	#model_url = 'https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q8_0.gguf'
	model_url = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf'

	llm = LlamaCPP(
	model_url=model_url,
	temperature=0.3,
	max_new_tokens=256,
	context_window=3900,
	model_kwargs={"n_gpu_layers": -1},
	messages_to_prompt=messages_to_prompt,
	completion_to_prompt=completion_to_prompt,
	verbose=True,
	)
	return llm

	def configure_embeddings():
	embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	return embed_model

	def configure_service_context(llm, embed_model):
	return ServiceContext.from_defaults(chunk_size=250, llm=llm, embed_model=embed_model)

	def initialize_vector_store_index(data_path, service_context):
	documents = SimpleDirectoryReader("./").load_data()
	index = VectorStoreIndex.from_documents(documents, service_context=service_context)
	return index

	# Configure and initialize components

	llm = configure_llama_model()
	embed_model = configure_embeddings()
	service_context = configure_service_context(llm, embed_model)
	index = initialize_vector_store_index("./", service_context)
	query_engine = index.as_query_engine()

	# Define a function for Gradio to use
	def get_response(text, username):
	# For simplicity, we are only using the 'text' argument
	response = str(query_engine.query(text))
	return response


	gr.ChatInterface(get_response).launch(debug=True,share=True)