DrishtiSharma's picture
Update app.py
99813f5 verified
#include non-patent literature (NPL) - arxiv, mdpi, ncbi, random web articles + remove patent numbers which belong to the same family!
# ranking should be based on title, abstract, claims, summary of the patent rather than a particular embodiment!
import os
import streamlit as st
from main import generate_search_terms, search_on_google_patents, check_similarity_of_patents
# Streamlit app starts here
st.title("Patent Similarity Search App (RAG + GOOGLE PATENTS SEARCH)")
st.subheader("User Input")
user_input = st.text_area("Enter Patent Data", "Paste your patent details here...")
# Sidebar for user input
with st.sidebar:
number_of_generated_search_terms = st.number_input("Number of Search Terms to Generate", value=5, min_value=1)
top_k_patents = st.number_input("Top K Patents to Search", value=10, min_value=1)
model_name = st.text_input("OpenAI Model Name", "gpt-4-0125-preview")
# Fetch API keys from environment variables
openai_key = os.getenv("OPENAI_API_KEY", "")
serpapi_key = os.getenv("SERP_API_KEY", "")
# Allow user to input missing keys
if not openai_key:
openai_key = st.text_input("Missing: OpenAI API Key (OPENAI_API_KEY)", "")
if not serpapi_key:
serpapi_key = st.text_input("Missing: SERP API Key (SERP_API_KEY)", "")
# Check which API keys are missing and notify the user
missing_keys = []
if not openai_key:
missing_keys.append("OpenAI API Key")
if not serpapi_key:
missing_keys.append("SERP API Key")
if missing_keys:
st.error(f"Unable to fetch {', '.join(missing_keys)}")
else:
# Main content
if st.button("Find Similar Patents"):
try:
# Generate search terms
with st.spinner('Generating search terms...'):
generated_search_terms = generate_search_terms(user_input, number_of_generated_search_terms)
st.success("Search terms generated!")
st.write("Generated search terms: ", generated_search_terms)
# Search patents using generated terms
with st.spinner('Searching for patents...'):
search_terms_patents = search_on_google_patents(generated_search_terms)
st.success("Patents search completed!")
st.subheader("Top Patents Found for Search Terms")
for search_term, patents in search_terms_patents.items():
st.write(f"Top {top_k_patents} patents found for search term: {search_term}")
if len(patents) == 0:
st.write(f"No patents found for search term: {search_term}")
else:
patent_titles = [patent['patentTitle'] for patent in patents]
st.json(patent_titles[:top_k_patents])
# Check similarity of patents
patents_list = []
with st.spinner('Checking similarities of patents...'):
for search_term, patents in search_terms_patents.items():
patents_list.extend(patents[:top_k_patents])
similarities = check_similarity_of_patents(user_input, patents_list)
st.success("Similarity check completed!")
# Enrich patents with similarity scores and sort them
for patent in similarities['listOfPatents']:
for p in patents_list:
if patent['patentNumber'] == p['patentNumber']:
p['similarityScore'] = patent['similarityScore']
p['patentGoogleUrl'] = f"https://patents.google.com/patent/{p['patentNumber']}"
break
patents_list = list(sorted(patents_list, key=lambda x: x.get('similarityScore', 0), reverse=True))
# Display top similar patents
st.subheader("Top Similar Patents")
st.json(patents_list)
except KeyError as e:
st.error(f"Key error encountered: {e}")
except Exception as e:
st.error(f"An error occurred: {e}")