from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough import streamlit as st import tiktoken from src.utils import load_config_values from src.dev_llm import FakeLLM from src.pdfParser import extract_text_from_pdf # Load in model and pipeline configuration values system_message, context_message, model_id = load_config_values( config_keys=[ "system_message", "context_message", "model_id", ] ) openai_tokeniser = tiktoken.encoding_for_model(model_id) # TODO: Change this to reflect prod model rather than dev models # Initalise fake values and a fake LLM to test out the full pipeline tmp_llm = FakeLLM() prompt = ChatPromptTemplate.from_template( template=context_message, ) chain = ( { "system_message": lambda x: system_message, "pdf_text": lambda x: uploaded_text, "data_to_extract": RunnablePassthrough() } |prompt |tmp_llm ) user_input_1 = st.text_input(label="input") uploaded_doc = st.file_uploader( label="Upload PDF:\n", type=".pdf" ) if uploaded_doc is not None: uploaded_text = extract_text_from_pdf(uploaded_doc) st.text(chain.invoke(user_input_1)) text_length = len(openai_tokeniser.encode(uploaded_text)) system_message_length = len(openai_tokeniser.encode(system_message)) context_message_length = len(openai_tokeniser.encode(context_message)) st.text(text_length + system_message_length + context_message_length)