seminarLabAI / seminar_edition_ai.py
inoid's picture
Updare retrive creation
72441d8
from langchain.docstore.document import Document
from llm_call import GeminiLLM
from langchain.chains.question_answering import load_qa_chain
import chromadb
from datetime import datetime
import os
from langchain_chroma import Chroma
from datetime import datetime
import pdfkit
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from pathlib import Path
import os
from pypdf import PdfReader
from llm_call import SermonGeminiPromptTemplate
bookQuestion = dict()
llm = None
embed_model = None
retriever = None
contemplandoQuestion = {
'DEVOCIONALMENTE':'¿Cómo estimula Dios su corazón a través de Su Palabra?',
'EXÉGESIS':'Cuál es el contexto de este pasaje?',
'CRISTO':'¿Cómo se comprende este texto a la luz de Cristo?',
'ARCO REDENTOR':'¿Cómo encaja este texto en la metanarrativa de las Escrituras?',
'EVANGELION': '¿Cómo se declara el evangelio en este texto?',
'EVANGELION_TWO': '¿Cómo interpretamos este texto a la luz del evangelio?',
}
proclamandoQuestion = {
'PÚBLICO':'¿Cuáles son los ídolos en los corazones de las personas que rechazarían el evangelio de Cristo?',
'HISTORIA':'¿Cómo el guión de su predicación comunica la historia de Dios?',
'EXPECTATIVAS': '¿Qué espera Dios que hagan como respuesta a esta predicación?',
'EXPECTATIVAS_TWO': '¿Cuáles son sus expectativas divinas como predicador de este mensaje?',
}
bookQuestion['Contemplando'] = contemplandoQuestion
bookQuestion['Proclamando'] = proclamandoQuestion
HISTORY_ANSWER = ""
DIRECTORY_PATH_TO_DOWNLOAD = 'data/sermon_lab_ai/download_files'
if not os.path.exists(DIRECTORY_PATH_TO_DOWNLOAD):
os.makedirs(f"{DIRECTORY_PATH_TO_DOWNLOAD}")
def getCurrentFileName():
now = datetime.now()
strNow = now.strftime("%m%d%Y_%H%M%S")
return f"sermonDay_{strNow}.pdf"
fileAddresToDownload = f"{DIRECTORY_PATH_TO_DOWNLOAD}{os.sep}{getCurrentFileName()}"
FILE_PATH_NAME = fileAddresToDownload
def updatePromptTemplate(
llmModel,
promptTemplate,
inputVariablesTemplate
):
prompt = PromptTemplate(template = promptTemplate,
input_variables = inputVariablesTemplate)
if llmModel == None:
llmBuilder = GeminiLLM()
llmModel = llmBuilder.getLLM()
chain = load_qa_chain(
llmModel,
chain_type = "stuff",
prompt = prompt
)
return chain
def predict(query):
templates = SermonGeminiPromptTemplate()
chain = updatePromptTemplate(
templates.getSermonPromptTemplate('BUILD_PREPARE_QUESTIONS'),
['question','SERMON_CONTEXT','context']
)
if query != '':
global retriever
answer = askQuestion(
query,
chain,
retriever,
topic = query,
KEY = 'question'
)
answer = (answer.split("<|assistant|>")[-1]).strip()
HISTORY_ANSWER = answer
return answer
else:
return query
def predictContemplando(queryKey):
#Call to LLM LangChaing inference
query = contemplandoQuestion[queryKey]
return predict(query)
def predictProclamando(queryKey):
#Call to LLM LangChaing inference
query = proclamandoQuestion[queryKey]
return predict(query)
####
#
####
def predictFromInit( sermonTopic, llmModelList = []):
global HISTORY_ANSWER
keyStr = 'SERMON_TOPIC'
templates = SermonGeminiPromptTemplate()
llm = llmModelList[0] if len(llmModelList) > 0 else None
if HISTORY_ANSWER == '':
chain = updatePromptTemplate(
llm,
templates.getSermonPromptTemplates()['BUILD_INIT'],
[keyStr,'CANT_VERSICULOS','context']
)
else:
chain = updatePromptTemplate(
templates.getSermonPromptTemplates()['BUILD_EMPTY'],
['BIBLE_VERSICLE','context']
)
keyStr = 'BIBLE_VERSICLE'
global retriever
global embed_model
if embed_model == None:
llmBuilder = GeminiLLM()
embed_model = llmBuilder.getEmbeddingsModel()
if retriever == None:
doc = Document(page_content="text", metadata={"source": "local"})
vectorstore = Chroma.from_documents(
documents=[doc],
embedding= embed_model,
persist_directory="chroma_db_dir_sermon", # Local mode with in-memory storage only
collection_name="sermon_lab_ai"
)
retriever = vectorstore.as_retriever(
search_kwargs={"k": 3}
)
answer = askQuestionInit(
'',
chain,
retriever,
topic = sermonTopic,
KEY = keyStr
)
#Create a new document and build a retriver
if answer != '':
doc = Document(page_content="text", metadata = {"source": "local"})
vectorstore = Chroma.from_documents(
documents=[doc],
embedding=embed_model,
persist_directory="chroma_db_dir_sermon", # Local mode with in-memory storage only
collection_name="sermon_lab_ai"
)
retriever = vectorstore.as_retriever(
search_kwargs = {"k": 3}
)
HISTORY_ANSWER = answer
return answer
####
#
####
def predictQuestionBuild(sermonTopic, llmModelList = []):
llm = llmModelList[0] if len(llmModelList) > 0 else None
templates = SermonGeminiPromptTemplate()
chain = updatePromptTemplate(
llm,
templates.getSermonPromptTemplates()['BUILD_QUESTION'],
['SERMON_IDEA', 'context']
)
global retriever
global embed_model
if embed_model == None:
llmBuilder = GeminiLLM()
embed_model = llmBuilder.getEmbeddingsModel()
if retriever == None:
doc = Document(page_content="text", metadata={"source": "local"})
vectorstore = Chroma.from_documents(
documents = [doc],
embedding = embed_model,
persist_directory="chroma_db_dir_sermon", # Local mode with in-memory storage only
collection_name="sermon_lab_ai"
)
retriever = vectorstore.as_retriever(
search_kwargs={"k": 3}
)
answer = askQuestionEx(
'',
chain,
retriever,
topic = sermonTopic,
KEY = 'SERMON_IDEA'
)
return answer
####
#
####
def predictDevotionBuild(sermonTopic, llmModelList = []):
templates = SermonGeminiPromptTemplate()
llm = llmModelList[0] if len(llmModelList) > 0 else None
chain = updatePromptTemplate(
llm,
templates.getSermonPromptTemplate()['BUILD_REFLECTIONS'],
['SERMON_IDEA', 'context']
)
global retriever
global HISTORY_ANSWER
global embed_model
if embed_model == None:
llmBuilder = GeminiLLM()
embed_model = llmBuilder.getEmbeddingsModel()
if retriever == None:
doc = Document(page_content="text", metadata={"source": "local"})
vectorstore = Chroma.from_documents(
documents=[doc],
embedding=embed_model,
persist_directory="chroma_db_dir_sermon", # Local mode with in-memory storage only
collection_name="sermon_lab_ai"
)
retriever = vectorstore.as_retriever(
search_kwargs={"k": 3}
)
answer = askQuestionEx(
HISTORY_ANSWER,
chain,
retriever,
topic = sermonTopic,
KEY = 'SERMON_IDEA'
)
return answer
####
#
####
def predictArgumentQuestionBuild(questionAnswer, llmModelList = []):
templates = SermonGeminiPromptTemplate()
llm = llmModelList[0] if len(llmModelList) > 0 else None
chain = updatePromptTemplate(
llm,
templates.getSermonPromptTemplates()['BUILD_ADD_INFORMATION_TO_QUEST_ANSWER'],
['QUESTION_ANSWER', 'context']
)
global retriever
global HISTORY_ANSWER
global embed_model
if embed_model == None:
llmBuilder = GeminiLLM()
embed_model = llmBuilder.getEmbeddingsModel()
if retriever == None:
doc = Document(page_content="text", metadata={"source": "local"})
vectorstore = Chroma.from_documents(
documents=[doc],
embedding = embed_model,
persist_directory="chroma_db_dir_sermon", # Local mode with in-memory storage only
collection_name="sermon_lab_ai"
)
retriever = vectorstore.as_retriever(
search_kwargs={"k": 3}
)
answer = askQuestionEx(
"",
chain,
retriever,
topic = questionAnswer,
KEY = 'QUESTION_ANSWER'
)
return answer
# A utility function for answer generation
def askQuestion(
question,
_chain,
_retriever,
topic = 'el amor de Dios',
KEY = 'SERMON_TOPIC'
):
#Obtener los Chunks relevantes a la pregunta en el RAG
#print(f" Question: {question}")
context = _retriever.get_relevant_documents(question)
#print("---- Contexto ----")
#print(context)
#print("____________________GLOBAL________")
global HISTORY_ANSWER
#print (HISTORY_ANSWER)
return (
_chain({
KEY: topic,
'SERMON_CONTEXT': HISTORY_ANSWER,
"input_documents": context,
"question": question
},
return_only_outputs = True)
)['output_text']
#A utility function for answer generation
def askQuestionEx(
question,
_chain,
_retriever,
topic = 'el amor de Dios',
KEY = 'SERMON_TOPIC'
):
context = _retriever.get_relevant_documents(question)
global HISTORY_ANSWER
return (
_chain({
KEY: topic,
"input_documents": context,
"question": question
},
return_only_outputs=True)
)['output_text']
# A utility function for answer generation
def askQuestionInit(
question,
_chain,
_retriever,
topic = 'el amor de Dios',
KEY = 'SERMON_TOPIC'
):
#Obtener los Chunks relevantes a la pregunta en el RAG
context = _retriever.get_relevant_documents(question)
settings = {
KEY: topic,
"input_documents": context,
"question": question
}
if KEY == 'SERMON_TOPIC':
settings['CANT_VERSICULOS'] = 5
return (
_chain(
settings,
return_only_outputs=True)
)['output_text']
def downloadSermonFile(answer):
if os.path.exists(FILE_PATH_NAME):
os.remove(FILE_PATH_NAME)
pdfkit.from_string(
answer,
FILE_PATH_NAME
)
return ""
def upload_file_ex(files):
file_paths = [file.name for file in files]
for filepath in file_paths:
name = Path(filepath)
file_content = 'Empty content'
if os.path.exists(filepath):
file_content = ''
reader = PdfReader(filepath)
for page in reader.pages:
file_content += page.extract_text()
HISTORY_ANSWER = file_content
return [file_paths, file_content]