|
import os |
|
from datetime import datetime |
|
import streamlit as st |
|
from patentwiz import preprocess_data, qa_agent |
|
|
|
|
|
api_key = os.getenv("OPENAI_API_KEY") |
|
if not api_key: |
|
st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.") |
|
st.stop() |
|
|
|
PROMPT = """ |
|
Task: Carefully review the given patent text and extract as much physical measurements information such as length/distance, mass/weight, time, temperature, Volume, area, speed, pressure, energy, power, electric current |
|
and voltage, frequency, force, acceleration, density, resistivity, magnetic field strength, and luminous intensity as much as possible. |
|
We are particularly interested in physical measurements including substance that was measured, Value of the measurement, and Unit of the measurement, and measurement type mentioned in the text. |
|
For each measurement, please provide the following details: |
|
- The substance that was measured. (substance) |
|
- The specific value or range that was measured. (Measured Value) |
|
- The unit of the measurement, if provided. (Unit) |
|
- The type of measurement being conducted (e.g., diameter, size, etc.) |
|
Format your response in a structured JSON-like format, as follows: |
|
{"Content": [ |
|
{ |
|
"Measurement_substance": "substance", |
|
"Measured_value": "value", |
|
"Measured_unit": "unit", |
|
"measurement_type": "type" |
|
}, |
|
// ... additional measurements, if present |
|
] |
|
} |
|
If multiple measurements are present in the text, each should be listed as a separate object within the "Content" array. |
|
Example: If the text includes the sentence, "The resulting BaCO3 had a crystallite size of between about 20 and 40 nm", the output should be: |
|
{"Content": [ |
|
{ |
|
"Measurement_substance": "BaCO3", |
|
"Measured_value": "between about 20 and 40", |
|
"Measured_unit": "nm", |
|
"measurement_type": "crystallite size" |
|
} |
|
] |
|
} |
|
Try to provide as complete and accurate information as possible. Print only the formatted JSON response. |
|
""" |
|
|
|
|
|
st.title("Technical Measurements Extractor for Patents") |
|
st.write( |
|
"Analyze patents to extract physical measurements such as length, mass, time, and more. " |
|
"Provide a date to download patents, and analyze them using GPT models." |
|
) |
|
|
|
|
|
st.header("Enter Details for Patent Analysis") |
|
user_date_input = st.text_input("Enter a date in the format 'YYYY-MM-DD':", value="2024-06-16") |
|
|
|
num_patents_to_analyze = st.number_input( |
|
"Number of patents to analyze:", min_value=1, value=1, step=1, help="Specify how many patents you want to analyze." |
|
) |
|
|
|
model_choice = st.selectbox( |
|
"Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis." |
|
) |
|
|
|
logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.") |
|
|
|
|
|
if st.button("Analyze Patents"): |
|
if not user_date_input: |
|
st.error("Please enter a valid date!") |
|
else: |
|
try: |
|
|
|
input_date = datetime.strptime(user_date_input, "%Y-%m-%d") |
|
year, month, day = input_date.year, input_date.month, input_date.day |
|
|
|
|
|
with st.spinner("Downloading and extracting patents..."): |
|
saved_patent_names = preprocess_data.parse_and_save_patents( |
|
year, month, day, logging_enabled |
|
) |
|
if not saved_patent_names: |
|
st.error("No patents found for the given date.") |
|
st.stop() |
|
st.success(f"{len(saved_patent_names)} patents found and processed!") |
|
|
|
|
|
random_patents = saved_patent_names[:num_patents_to_analyze] |
|
total_cost = 0 |
|
results = [] |
|
|
|
st.write("Starting patent analysis...") |
|
for i, patent_file in enumerate(random_patents): |
|
cost, output = qa_agent.call_QA_to_json( |
|
PROMPT, |
|
year, |
|
month, |
|
day, |
|
saved_patent_names, |
|
i, |
|
logging_enabled, |
|
model_choice, |
|
) |
|
total_cost += cost |
|
results.append(output) |
|
|
|
|
|
st.write(f"**Total Cost:** ${total_cost:.4f}") |
|
st.write("### Analysis Results:") |
|
for idx, result in enumerate(results): |
|
st.subheader(f"Patent {idx + 1}") |
|
st.json(result) |
|
|
|
except ValueError as ve: |
|
st.error(f"Invalid date format: {ve}") |
|
except Exception as e: |
|
st.error(f"An unexpected error occurred: {e}") |