Spaces:

DrishtiSharma
/

technical-measurements-extractor-for-patents

Sleeping

App Files Files Community

technical-measurements-extractor-for-patents / interim.py

DrishtiSharma

Update interim.py

f29ee59 verified 14 days ago

raw

history blame contribute delete

4.86 kB

	import os
	from datetime import datetime
	import streamlit as st
	from patentwiz import preprocess_data, qa_agent

	# Check if the API key is loaded
	api_key = os.getenv("OPENAI_API_KEY")
	if not api_key:
	st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.")
	st.stop()

	PROMPT = """
	Task: Carefully review the given patent text and extract as much physical measurements information such as length/distance, mass/weight, time, temperature, Volume, area, speed, pressure, energy, power, electric current
	and voltage, frequency, force, acceleration, density, resistivity, magnetic field strength, and luminous intensity as much as possible.
	We are particularly interested in physical measurements including substance that was measured, Value of the measurement, and Unit of the measurement, and measurement type mentioned in the text.
	For each measurement, please provide the following details:
	- The substance that was measured. (substance)
	- The specific value or range that was measured. (Measured Value)
	- The unit of the measurement, if provided. (Unit)
	- The type of measurement being conducted (e.g., diameter, size, etc.)
	Format your response in a structured JSON-like format, as follows:
	{"Content": [
	{
	"Measurement_substance": "substance",
	"Measured_value": "value",
	"Measured_unit": "unit",
	"measurement_type": "type"
	},
	// ... additional measurements, if present
	]
	}
	If multiple measurements are present in the text, each should be listed as a separate object within the "Content" array.
	Example: If the text includes the sentence, "The resulting BaCO3 had a crystallite size of between about 20 and 40 nm", the output should be:
	{"Content": [
	{
	"Measurement_substance": "BaCO3",
	"Measured_value": "between about 20 and 40",
	"Measured_unit": "nm",
	"measurement_type": "crystallite size"
	}
	]
	}
	Try to provide as complete and accurate information as possible. Print only the formatted JSON response.
	"""

	# Title and description
	st.title("Technical Measurements Extractor for Patents")
	st.write(
	"Analyze patents to extract physical measurements such as length, mass, time, and more. "
	"Provide a date to download patents, and analyze them using GPT models."
	)

	# User Input Section
	st.header("Enter Details for Patent Analysis")
	user_date_input = st.text_input("Enter a date in the format 'YYYY-MM-DD':", value="2024-06-16")

	num_patents_to_analyze = st.number_input(
	"Number of patents to analyze:", min_value=1, value=1, step=1, help="Specify how many patents you want to analyze."
	)

	model_choice = st.selectbox(
	"Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis."
	)

	logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.")

	# Run Analysis Button
	if st.button("Analyze Patents"):
	if not user_date_input:
	st.error("Please enter a valid date!")
	else:
	try:
	# Parse date input
	input_date = datetime.strptime(user_date_input, "%Y-%m-%d")
	year, month, day = input_date.year, input_date.month, input_date.day

	# Step 1: Download and preprocess patents
	with st.spinner("Downloading and extracting patents..."):
	saved_patent_names = preprocess_data.parse_and_save_patents(
	year, month, day, logging_enabled
	)
	if not saved_patent_names:
	st.error("No patents found for the given date.")
	st.stop()
	st.success(f"{len(saved_patent_names)} patents found and processed!")

	# Step 2: Analyze patents using GPT
	random_patents = saved_patent_names[:num_patents_to_analyze]
	total_cost = 0
	results = []

	st.write("Starting patent analysis...")
	for i, patent_file in enumerate(random_patents):
	cost, output = qa_agent.call_QA_to_json(
	PROMPT,
	year,
	month,
	day,
	saved_patent_names,
	i,
	logging_enabled,
	model_choice,
	)
	total_cost += cost
	results.append(output)

	# Step 3: Display results
	st.write(f"Total Cost: ${total_cost:.4f}")
	st.write("### Analysis Results:")
	for idx, result in enumerate(results):
	st.subheader(f"Patent {idx + 1}")
	st.json(result)

	except ValueError as ve:
	st.error(f"Invalid date format: {ve}")
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")