Spaces:
Running
Running
File size: 1,509 Bytes
ce61e92 9f5f200 5aaa965 5d4bf7d 5aaa965 123ba7e 5d4bf7d 65db96a 123ba7e ce61e92 5d4bf7d ce61e92 d3ce4e0 ce61e92 9f5f200 d3ce4e0 5aaa965 5d4bf7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
import streamlit as st
import tiktoken
from src.utils import load_config_values
from src.dev_llm import FakeLLM
from src.pdfParser import extract_text_from_pdf
# Load in model and pipeline configuration values
system_message, context_message, model_id = load_config_values(
config_keys=[
"system_message",
"context_message",
"model_id",
]
)
openai_tokeniser = tiktoken.encoding_for_model(model_id)
# TODO: Change this to reflect prod model rather than dev models
# Initalise fake values and a fake LLM to test out the full pipeline
tmp_llm = FakeLLM()
prompt = ChatPromptTemplate.from_template(
template=context_message,
)
chain = (
{
"system_message": lambda x: system_message,
"pdf_text": lambda x: uploaded_text,
"data_to_extract": RunnablePassthrough()
}
|prompt
|tmp_llm
)
user_input_1 = st.text_input(label="input")
uploaded_doc = st.file_uploader(
label="Upload PDF:\n",
type=".pdf"
)
if uploaded_doc is not None:
uploaded_text = extract_text_from_pdf(uploaded_doc)
st.text(chain.invoke(user_input_1))
text_length = len(openai_tokeniser.encode(uploaded_text))
system_message_length = len(openai_tokeniser.encode(system_message))
context_message_length = len(openai_tokeniser.encode(context_message))
st.text(text_length + system_message_length + context_message_length)
|