Spaces:

awinml
/

2-qa-earnings-sentencewise

Build error

App Files Files Community

awinml commited on Mar 12, 2023

Commit

9975133

•

1 Parent(s): 72a93a0

Upload 2 files

Browse files

Files changed (2) hide show

app.py +73 -37
utils.py +15 -7

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import pinecone
 import streamlit as st
 import streamlit_scrollable_textbox as stx
 import openai
 from utils import (
@@ -17,23 +20,32 @@ from utils import (
     format_query,
     sentence_id_combine,
     text_lookup,
-    gpt3,
 )
 st.title("Abstractive Question Answering")
 st.write(
     "The app uses the quarterly earnings call transcripts for 10 companies (Apple, AMD, Amazon, Cisco, Google, Microsoft, Nvidia, ASML, Intel, Micron) for the years 2016 to 2020."
 )
-query_text = st.text_input("Input Query", value="Who is the CEO of Apple?")
-years_choice = ["2020", "2019", "2018", "2017", "2016"]
-year = st.selectbox("Year", years_choice)
-quarter = st.selectbox("Quarter", ["Q1", "Q2", "Q3", "Q4"])
 ticker_choice = [
     "AAPL",
@@ -48,23 +60,33 @@ ticker_choice = [
     "AMD",
 ]
-ticker = st.selectbox("Company", ticker_choice)
-num_results = int(st.number_input("Number of Results to query", 1, 5, value=5))
 # Choose encoder model
 encoder_models_choice = ["SGPT", "MPNET"]
-encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
 # Choose decoder model
-decoder_models_choice = ["FLAN-T5", "T5", "GPT3 - (text-davinci-003)"]
-decoder_model = st.selectbox("Select Decoder Model", decoder_models_choice)
 if encoder_model == "MPNET":
@@ -82,13 +104,15 @@ elif encoder_model == "SGPT":
     retriever_model = get_sgpt_embedding_model()
-window = int(st.number_input("Sentence Window Size", 0, 5, value=3))
-threshold = float(
-    st.number_input(
-        label="Similarity Score Threshold", step=0.05, format="%.2f", value=0.35
     )
-)
 data = get_data()
@@ -109,22 +133,26 @@ else:
     context_list = format_query(query_results)
-st.subheader("Answer:")
 if decoder_model == "GPT3 - (text-davinci-003)":
-    with st.form("my_form"):
-        openai_key = st.text_input(
-            "Enter OpenAI key",
-            value="",
-            type="password",
-        )
-        submitted = st.form_submit_button("Submit")
-        if submitted:
-            api_key = save_key(openai_key)
-            openai.api_key = api_key
-            generated_text = gpt3(query_text, context_list)
-            st.write(generated_text)
 elif decoder_model == "T5":
     t5_pipeline = get_t5_model()
@@ -132,7 +160,9 @@ elif decoder_model == "T5":
     for context_text in context_list:
         output_text.append(t5_pipeline(context_text)[0]["summary_text"])
     generated_text = ". ".join(output_text)
-    st.write(t5_pipeline(generated_text)[0]["summary_text"])
 elif decoder_model == "FLAN-T5":
     flan_t5_pipeline = get_flan_t5_model()
@@ -140,13 +170,19 @@ elif decoder_model == "FLAN-T5":
     for context_text in context_list:
         output_text.append(flan_t5_pipeline(context_text)[0]["summary_text"])
     generated_text = ". ".join(output_text)
-    st.write(flan_t5_pipeline(generated_text)[0]["summary_text"])
-with st.expander("See Retrieved Text"):
-    for context_text in context_list:
-        st.markdown(f"- {context_text}")
 file_text = retrieve_transcript(data, year, quarter, ticker)
-with st.expander("See Transcript"):
-    stx.scrollableTextbox(file_text, height=700, border=False, fontFamily="Helvetica")

 import pinecone
 import streamlit as st
+st.set_page_config(layout="wide")
 import streamlit_scrollable_textbox as stx
 import openai
 from utils import (
     format_query,
     sentence_id_combine,
     text_lookup,
+    generate_prompt,
+    gpt_model,
 )
 st.title("Abstractive Question Answering")
 st.write(
     "The app uses the quarterly earnings call transcripts for 10 companies (Apple, AMD, Amazon, Cisco, Google, Microsoft, Nvidia, ASML, Intel, Micron) for the years 2016 to 2020."
 )
+col1, col2 = st.columns([3, 3], gap="medium")
+with col1:
+    st.subheader("Question")
+    query_text = st.text_input("Input Query", value="Who is the CEO of Apple?")
+with col1:
+    years_choice = ["2020", "2019", "2018", "2017", "2016"]
+with col1:
+    year = st.selectbox("Year", years_choice)
+with col1:
+    quarter = st.selectbox("Quarter", ["Q1", "Q2", "Q3", "Q4"])
 ticker_choice = [
     "AAPL",
     "AMD",
 ]
+with col1:
+    ticker = st.selectbox("Company", ticker_choice)
+with st.sidebar:
+    st.subheader("Select Options:")
+with st.sidebar:
+    num_results = int(st.number_input("Number of Results to query", 1, 5, value=5))
 # Choose encoder model
 encoder_models_choice = ["SGPT", "MPNET"]
+with st.sidebar:
+    encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
 # Choose decoder model
+decoder_models_choice = [
+    "GPT3 - (text-davinci-003)",
+    "T5",
+    "FLAN-T5",
+]
+with st.sidebar:
+    decoder_model = st.selectbox("Select Decoder Model", decoder_models_choice)
 if encoder_model == "MPNET":
     retriever_model = get_sgpt_embedding_model()
+with st.sidebar:
+    window = int(st.number_input("Sentence Window Size", 0, 5, value=3))
+with st.sidebar:
+    threshold = float(
+        st.number_input(
+            label="Similarity Score Threshold", step=0.05, format="%.2f", value=0.35
+        )
     )
 data = get_data()
     context_list = format_query(query_results)
+prompt = generate_prompt(query_text, context_list)
 if decoder_model == "GPT3 - (text-davinci-003)":
+    with col2:
+        with st.form("my_form"):
+            edited_prompt = st.text_area(label="Model Prompt", value=prompt, height=270)
+            openai_key = st.text_input(
+                "Enter OpenAI key",
+                value="",
+                type="password",
+            )
+            submitted = st.form_submit_button("Submit")
+            if submitted:
+                api_key = save_key(openai_key)
+                openai.api_key = api_key
+                generated_text = gpt_model(edited_prompt)
+                with col2:
+                    st.subheader("Answer:")
+                    st.write(generated_text)
 elif decoder_model == "T5":
     t5_pipeline = get_t5_model()
     for context_text in context_list:
         output_text.append(t5_pipeline(context_text)[0]["summary_text"])
     generated_text = ". ".join(output_text)
+    with col2:
+        st.subheader("Answer:")
+        st.write(t5_pipeline(generated_text)[0]["summary_text"])
 elif decoder_model == "FLAN-T5":
     flan_t5_pipeline = get_flan_t5_model()
     for context_text in context_list:
         output_text.append(flan_t5_pipeline(context_text)[0]["summary_text"])
     generated_text = ". ".join(output_text)
+    with col2:
+        st.subheader("Answer:")
+        st.write(flan_t5_pipeline(generated_text)[0]["summary_text"])
+with col1:
+    with st.expander("See Retrieved Text"):
+        for context_text in context_list:
+            st.markdown(f"- {context_text}")
 file_text = retrieve_transcript(data, year, quarter, ticker)
+with col1:
+    with st.expander("See Transcript"):
+        stx.scrollableTextbox(
+            file_text, height=700, border=False, fontFamily="Helvetica"
+        )

utils.py CHANGED Viewed

@@ -113,15 +113,23 @@ def text_lookup(data, sentence_ids):
     return context
-def gpt3(query, result):
     response = openai.Completion.create(
         model="text-davinci-003",
-        prompt=f"""Context information is below. \n"
-    "---------------------\n"
-    "{result}"
-    "\n---------------------\n"
-    "Given the context information and prior knowledge, answer this question: {query}. \n"
-    "Try to include as many key details as possible and format the answer in points. \n" """,
         temperature=0.1,
         max_tokens=512,
         top_p=1.0,

     return context
+def generate_prompt(query_text, context_list):
+    #context = " ".join(context_list)
+    prompt = f"""
+    Context information is below:
+    ---------------------
+    {context_list}
+    ---------------------
+    Given the context information and prior knowledge, answer this question:
+    {query_text}
+    Try to include as many key details as possible and format the answer in points."""
+    return prompt
+def gpt_model(prompt):
     response = openai.Completion.create(
         model="text-davinci-003",
+        prompt=prompt,
         temperature=0.1,
         max_tokens=512,
         top_p=1.0,