File size: 4,826 Bytes
042d924 798b27c d8d1045 56541bd 798b27c 9f4d7d7 56541bd dd51e9f 56541bd a2fe734 042d924 798b27c 042d924 a2fe734 042d924 a2fe734 798b27c a2fe734 d8d1045 a2fe734 d8d1045 a2fe734 56541bd a2fe734 1b8ed56 56541bd dd51e9f 56541bd dd51e9f 56541bd a2fe734 9f4d7d7 a2fe734 9f4d7d7 a2fe734 9f4d7d7 1b8ed56 a2fe734 571871b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import streamlit as st
import sounddevice as sd
import numpy as np
import wave
import whisper
import os
import streamlit.components.v1 as components
import tempfile
import io
import requests
import json
import openai
# Define a function to split text into chunks
def chunk_text(text, chunk_size=2000):
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
chunks.append(chunk)
start = end
return chunks
# Streamlit Session State
if 'learning_objectives' not in st.session_state:
st.session_state.learning_objectives = ""
# Streamlit User Input Form
st.title("Patent Claims Extraction")
# API Key Input
api_key = st.text_input("Enter your OpenAI API Key:", type="password")
# Audio Recording
record_audio = st.checkbox("Record Audio")
if record_audio:
audio_frames = []
def audio_callback(indata, frames, time, status):
if status:
print(status, flush=True)
if any(indata):
audio_frames.append(indata.copy())
if st.button("Stop Recording"): # Moved this button here to stop audio recording
sd.stop()
with st.spinner("Recording..."):
with sd.InputStream(callback=audio_callback):
st.text("Recording audio. Click 'Stop Recording' when finished.")
st.success("Recording stopped")
if audio_frames:
audio_data = np.concatenate(audio_frames, axis=0)
with wave.open("recorded_audio.wav", "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(44100)
wf.writeframes(audio_data.tobytes())
# Moved the submit_button check here
if 'submit_button' in st.session_state:
model = whisper.load_model("base")
audio_data = audio.export().read()
audio_bytes_io = io.BytesIO(audio_data)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audio_file:
audio_file.write(audio_bytes_io.read())
audio_file_path = audio_file.name
st.audio(audio_file_path, format="audio/wav")
st.info("Transcribing...")
st.success("Transcription complete")
result = model.transcribe(audio_file_path)
transcript = result['text']
with st.expander("See transcript"):
st.markdown(transcript)
# Model Selection Dropdown
model_choice = st.selectbox(
"Select the model you want to use:",
["gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo", "gpt-4-0314", "gpt-4-0613", "gpt-4"]
)
# Context, Subject, and Level
context = "You are a patent claims identifier and extractor. You will freeform text, identify any claims contained therein that may be patentable. You identify, extract, print such claims, briefly explain why each claim is patentable."
userinput = st.text_input("Input Text:", "Freeform text here!")
# Initialize OpenAI API
if api_key:
openai.api_key = api_key
# Learning Objectives
st.write("### Patentable Claims:")
# Initialize autogenerated objectives
claims_extraction = ""
# Initialize status placeholder
learning_status_placeholder = st.empty()
disable_button_bool = False
if userinput and api_key and st.button("Extract Claims", key="claims_extraction", disabled=disable_button_bool):
# Split the user input into chunks
input_chunks = chunk_text(userinput)
# Initialize a variable to store the extracted claims
all_extracted_claims = ""
for chunk in input_chunks:
# Display status message for the current chunk
learning_status_placeholder.text(f"Extracting Patentable Claims for chunk {input_chunks.index(chunk) + 1}...")
# API call to generate objectives for the current chunk
claims_extraction_response = openai.ChatCompletion.create(
model=model_choice,
messages=[
{"role": "user", "content": f"Extract any patentable claims from the following: \n {chunk}. \n Extract each claim. Briefly explain why you extracted this word phrase. Exclude any additional commentary."}
]
)
# Extract the generated objectives from the API response
claims_extraction = claims_extraction_response['choices'][0]['message']['content']
# Append the extracted claims from the current chunk to the overall results
all_extracted_claims += claims_extraction.strip()
# Save the generated objectives to session state
st.session_state.claims_extraction = all_extracted_claims
# Display generated objectives for all chunks
learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
# Citation
st.markdown("<sub>This app was created by [Taylor Ennen](https://github.com/taylor-ennen/GPT-Streamlit-MVP) & [Tonic](https://huggingface.co/tonic)</sub>", unsafe_allow_html=True)
|