Spaces:
Running
Running
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_core.runnables import RunnablePassthrough | |
import streamlit as st | |
import tiktoken | |
from src.utils import load_config_values | |
from src.dev_llm import FakeLLM | |
from src.pdfParser import extract_text_from_pdf | |
# Load in model and pipeline configuration values | |
system_message, context_message, model_id = load_config_values( | |
config_keys=[ | |
"system_message", | |
"context_message", | |
"model_id", | |
] | |
) | |
openai_tokeniser = tiktoken.encoding_for_model(model_id) | |
# TODO: Change this to reflect prod model rather than dev models | |
# Initalise fake values and a fake LLM to test out the full pipeline | |
tmp_llm = FakeLLM() | |
prompt = ChatPromptTemplate.from_template( | |
template=context_message, | |
) | |
chain = ( | |
{ | |
"system_message": lambda x: system_message, | |
"pdf_text": lambda x: uploaded_text, | |
"data_to_extract": RunnablePassthrough() | |
} | |
|prompt | |
|tmp_llm | |
) | |
user_input_1 = st.text_input(label="input") | |
uploaded_doc = st.file_uploader( | |
label="Upload PDF:\n", | |
type=".pdf" | |
) | |
if uploaded_doc is not None: | |
uploaded_text = extract_text_from_pdf(uploaded_doc) | |
st.text(chain.invoke(user_input_1)) | |
text_length = len(openai_tokeniser.encode(uploaded_text)) | |
system_message_length = len(openai_tokeniser.encode(system_message)) | |
context_message_length = len(openai_tokeniser.encode(context_message)) | |
st.text(text_length + system_message_length + context_message_length) | |