|
import streamlit as st |
|
import sounddevice as sd |
|
import numpy as np |
|
import wave |
|
import whisper |
|
import os |
|
import streamlit.components.v1 as components |
|
import tempfile |
|
import io |
|
import requests |
|
import json |
|
import openai |
|
|
|
|
|
def chunk_text(text, chunk_size=2000): |
|
chunks = [] |
|
start = 0 |
|
while start < len(text): |
|
end = start + chunk_size |
|
chunk = text[start:end] |
|
chunks.append(chunk) |
|
start = end |
|
return chunks |
|
|
|
|
|
if 'learning_objectives' not in st.session_state: |
|
st.session_state.learning_objectives = "" |
|
|
|
|
|
st.title("Patent Claims Extraction") |
|
|
|
|
|
api_key = st.text_input("Enter your OpenAI API Key:", type="password") |
|
|
|
|
|
record_audio = st.checkbox("Record Audio") |
|
if record_audio: |
|
audio_frames = [] |
|
|
|
def audio_callback(indata, frames, time, status): |
|
if status: |
|
print(status, flush=True) |
|
if any(indata): |
|
audio_frames.append(indata.copy()) |
|
|
|
if st.button("Stop Recording"): |
|
sd.stop() |
|
|
|
with st.spinner("Recording..."): |
|
with sd.InputStream(callback=audio_callback): |
|
st.text("Recording audio. Click 'Stop Recording' when finished.") |
|
|
|
st.success("Recording stopped") |
|
|
|
if audio_frames: |
|
audio_data = np.concatenate(audio_frames, axis=0) |
|
with wave.open("recorded_audio.wav", "wb") as wf: |
|
wf.setnchannels(1) |
|
wf.setsampwidth(2) |
|
wf.setframerate(44100) |
|
wf.writeframes(audio_data.tobytes()) |
|
|
|
|
|
if 'submit_button' in st.session_state: |
|
model = whisper.load_model("base") |
|
audio_data = audio.export().read() |
|
audio_bytes_io = io.BytesIO(audio_data) |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audio_file: |
|
audio_file.write(audio_bytes_io.read()) |
|
audio_file_path = audio_file.name |
|
st.audio(audio_file_path, format="audio/wav") |
|
st.info("Transcribing...") |
|
st.success("Transcription complete") |
|
result = model.transcribe(audio_file_path) |
|
transcript = result['text'] |
|
|
|
with st.expander("See transcript"): |
|
st.markdown(transcript) |
|
|
|
|
|
model_choice = st.selectbox( |
|
"Select the model you want to use:", |
|
["gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo", "gpt-4-0314", "gpt-4-0613", "gpt-4"] |
|
) |
|
|
|
|
|
context = "You are a patent claims identifier and extractor. You will freeform text, identify any claims contained therein that may be patentable. You identify, extract, print such claims, briefly explain why each claim is patentable." |
|
userinput = st.text_input("Input Text:", "Freeform text here!") |
|
|
|
|
|
if api_key: |
|
openai.api_key = api_key |
|
|
|
|
|
st.write("### Patentable Claims:") |
|
|
|
claims_extraction = "" |
|
|
|
learning_status_placeholder = st.empty() |
|
disable_button_bool = False |
|
|
|
if userinput and api_key and st.button("Extract Claims", key="claims_extraction", disabled=disable_button_bool): |
|
|
|
input_chunks = chunk_text(userinput) |
|
|
|
|
|
all_extracted_claims = "" |
|
|
|
for chunk in input_chunks: |
|
|
|
learning_status_placeholder.text(f"Extracting Patentable Claims for chunk {input_chunks.index(chunk) + 1}...") |
|
|
|
|
|
claims_extraction_response = openai.ChatCompletion.create( |
|
model=model_choice, |
|
messages=[ |
|
{"role": "user", "content": f"Extract any patentable claims from the following: \n {chunk}. \n Extract each claim. Briefly explain why you extracted this word phrase. Exclude any additional commentary."} |
|
] |
|
) |
|
|
|
|
|
claims_extraction = claims_extraction_response['choices'][0]['message']['content'] |
|
|
|
|
|
all_extracted_claims += claims_extraction.strip() |
|
|
|
|
|
st.session_state.claims_extraction = all_extracted_claims |
|
|
|
|
|
learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}") |
|
|
|
|
|
st.markdown("<sub>This app was created by [Taylor Ennen](https://github.com/taylor-ennen/GPT-Streamlit-MVP) & [Tonic](https://huggingface.co/tonic)</sub>", unsafe_allow_html=True) |
|
|