#include non-patent literature (NPL) - arxiv, mdpi, ncbi, random web articles + remove patent numbers which belong to the same family! # ranking should be based on title, abstract, claims, summary of the patent rather than a particular embodiment! import os import streamlit as st from main import generate_search_terms, search_on_google_patents, check_similarity_of_patents # Streamlit app starts here st.title("Patent Similarity Search App (RAG + GOOGLE PATENTS SEARCH)") st.subheader("User Input") user_input = st.text_area("Enter Patent Data", "Paste your patent details here...") # Sidebar for user input with st.sidebar: number_of_generated_search_terms = st.number_input("Number of Search Terms to Generate", value=5, min_value=1) top_k_patents = st.number_input("Top K Patents to Search", value=10, min_value=1) model_name = st.text_input("OpenAI Model Name", "gpt-4-0125-preview") # Fetch API keys from environment variables openai_key = os.getenv("OPENAI_API_KEY", "") serpapi_key = os.getenv("SERP_API_KEY", "") # Allow user to input missing keys if not openai_key: openai_key = st.text_input("Missing: OpenAI API Key (OPENAI_API_KEY)", "") if not serpapi_key: serpapi_key = st.text_input("Missing: SERP API Key (SERP_API_KEY)", "") # Check which API keys are missing and notify the user missing_keys = [] if not openai_key: missing_keys.append("OpenAI API Key") if not serpapi_key: missing_keys.append("SERP API Key") if missing_keys: st.error(f"Unable to fetch {', '.join(missing_keys)}") else: # Main content if st.button("Find Similar Patents"): try: # Generate search terms with st.spinner('Generating search terms...'): generated_search_terms = generate_search_terms(user_input, number_of_generated_search_terms) st.success("Search terms generated!") st.write("Generated search terms: ", generated_search_terms) # Search patents using generated terms with st.spinner('Searching for patents...'): search_terms_patents = search_on_google_patents(generated_search_terms) st.success("Patents search completed!") st.subheader("Top Patents Found for Search Terms") for search_term, patents in search_terms_patents.items(): st.write(f"Top {top_k_patents} patents found for search term: {search_term}") if len(patents) == 0: st.write(f"No patents found for search term: {search_term}") else: patent_titles = [patent['patentTitle'] for patent in patents] st.json(patent_titles[:top_k_patents]) # Check similarity of patents patents_list = [] with st.spinner('Checking similarities of patents...'): for search_term, patents in search_terms_patents.items(): patents_list.extend(patents[:top_k_patents]) similarities = check_similarity_of_patents(user_input, patents_list) st.success("Similarity check completed!") # Enrich patents with similarity scores and sort them for patent in similarities['listOfPatents']: for p in patents_list: if patent['patentNumber'] == p['patentNumber']: p['similarityScore'] = patent['similarityScore'] p['patentGoogleUrl'] = f"https://patents.google.com/patent/{p['patentNumber']}" break patents_list = list(sorted(patents_list, key=lambda x: x.get('similarityScore', 0), reverse=True)) # Display top similar patents st.subheader("Top Similar Patents") st.json(patents_list) except KeyError as e: st.error(f"Key error encountered: {e}") except Exception as e: st.error(f"An error occurred: {e}")