docverifyrag / app.py
Carlos Salgado
fallback on pypdf, trim flake, minor ux
e39bb0b
import io
import os
import streamlit as st
import tempfile
from scripts import analyze_metadata, generate_metadata, ingest, MODEL_NAME
st.title('# DocVerifyRAG')
st.write('## Anomaly detection for BIM document metadata')
def suggest_metadata(file_upload):
extension = uploaded_file.name.split('.')[-1]
with tempfile.NamedTemporaryFile(delete=False) as tmp:
tmp.write(uploaded_file.read())
st.write(f'Created temporary file {tmp.name}')
st.write('## Ingesting Unstructured file')
docs = ingest(tmp.name)
print(f'Ingested {tmp.name}')
metadata = generate_metadata(docs)
st.write('## Querying Together.ai API')
st.write(f'### Suggested Metadata Generated by {MODEL_NAME}')
st.write(f'#### {metadata}')
with st.form('analyze_form'):
st.write('Enter your file metadata in the following schema:')
text = st.text_input(label='Filename, Description, Discipline',
value="", placeholder=str)
submitted = st.form_submit_button('Submit')
if submitted:
filename, description, discipline = text.split(',')
st.write('## Analyzing with Vectara + together.ai')
analysis = analyze_metadata(filename, description, discipline)
st.write(analysis)
submitted = None
st.write('## Generate metadata?')
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
query_api = st.button('Query API')
if query_api:
suggest_metadata(uploaded_file)
query_api = None