eagle0504 commited on
Commit
451d492
·
1 Parent(s): ecab2ea

files cleaned up

Browse files
Files changed (2) hide show
  1. app.py +6 -4
  2. helper/utils.py +16 -60
app.py CHANGED
@@ -49,7 +49,9 @@ with st.sidebar:
49
 
50
  # Chunk size
51
  chunk_size_input = st.number_input(
52
- "Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):", value=2, step=1
 
 
53
  )
54
 
55
  # Quantization
@@ -68,8 +70,8 @@ with st.sidebar:
68
 
69
  # Select FM
70
  option = st.selectbox(
71
- "Which foundational model would you like?",
72
- ("GPT4", "LLAMA3"))
73
 
74
  # Clear button
75
  clear_button = st.sidebar.button("Clear Conversation", key="clear")
@@ -135,7 +137,7 @@ elif uploaded_files:
135
  result = refs_tab
136
 
137
  # Call FM
138
- content = ' '.join(list(result.sentences))
139
  if option == "GPT4":
140
  response = call_gpt(prompt, content)
141
  else:
 
49
 
50
  # Chunk size
51
  chunk_size_input = st.number_input(
52
+ "Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):",
53
+ value=2,
54
+ step=1,
55
  )
56
 
57
  # Quantization
 
70
 
71
  # Select FM
72
  option = st.selectbox(
73
+ "Which foundational model would you like?", ("GPT4", "LLAMA3")
74
+ )
75
 
76
  # Clear button
77
  clear_button = st.sidebar.button("Clear Conversation", key="clear")
 
137
  result = refs_tab
138
 
139
  # Call FM
140
+ content = " ".join(list(result.sentences))
141
  if option == "GPT4":
142
  response = call_gpt(prompt, content)
143
  else:
helper/utils.py CHANGED
@@ -15,44 +15,6 @@ def current_year():
15
  return now.year
16
 
17
 
18
- # def read_and_textify(
19
- # files: List[str],
20
- # ) -> Tuple[List[str], List[str]]:
21
- # """
22
- # Reads PDF files and extracts text from each page.
23
-
24
- # This function iterates over a list of uploaded PDF files, extracts text from each page,
25
- # and compiles a list of texts and corresponding source information.
26
-
27
- # Args:
28
- # files (List[st.uploaded_file_manager.UploadedFile]): A list of uploaded PDF files.
29
-
30
- # Returns:
31
- # Tuple[List[str], List[str]]: A tuple containing two lists:
32
- # 1. A list of strings, where each string is the text extracted from a PDF page.
33
- # 2. A list of strings indicating the source of each text (file name and page number).
34
- # """
35
-
36
- # # Initialize lists to store extracted texts and their sources
37
- # text_list = [] # List to store extracted text
38
- # sources_list = [] # List to store source information
39
-
40
- # # Iterate over each file
41
- # for file in files:
42
- # pdfReader = PyPDF2.PdfReader(file) # Create a PDF reader object
43
- # # Iterate over each page in the PDF
44
- # for i in range(len(pdfReader.pages)):
45
- # pageObj = pdfReader.pages[i] # Get the page object
46
- # text = pageObj.extract_text() # Extract text from the page
47
- # pageObj.clear() # Clear the page object (optional, for memory management)
48
- # text_list.append(text) # Add extracted text to the list
49
- # # Create a source identifier and add it to the list
50
- # sources_list.append(file.name + "_page_" + str(i))
51
-
52
- # # Return the lists of texts and sources
53
- # return [text_list, sources_list]
54
-
55
-
56
  def read_and_textify(
57
  files: List[str], chunk_size: int = 2 # Default chunk size set to 50
58
  ) -> Tuple[List[str], List[str]]:
@@ -85,9 +47,9 @@ def read_and_textify(
85
  text = pageObj.extract_text() # Extract text from the page
86
  if text:
87
  # Split text into chunks of approximately 'chunk_size' words
88
- words = text.split('. ')
89
  for j in range(0, len(words), chunk_size):
90
- chunk = ". ".join(words[j : j + chunk_size]) + '.'
91
  text_list.append(chunk)
92
  # Create a source identifier for each chunk and add it to the list
93
  sources_list.append(f"{file.name}_page_{i}_chunk_{j // chunk_size}")
@@ -134,22 +96,22 @@ def call_gpt(prompt: str, content: str) -> str:
134
  """
135
  Sends a structured conversation context including a system prompt, user prompt,
136
  and additional background content to the GPT-3.5-turbo model for a response.
137
-
138
  This function is responsible for generating an AI-powered response by interacting
139
  with the OpenAI API. It puts together a preset system message, a formatted user query,
140
  and additional background information before requesting the completion from the model.
141
-
142
  Args:
143
  prompt (str): The main question or topic that the user wants to address.
144
  content (str): Additional background information or details relevant to the prompt.
145
-
146
  Returns:
147
  str: The generated response from the GPT model based on the given prompts and content.
148
-
149
  Note: 'openai_client' is assumed to be an already created and authenticated instance of the OpenAI
150
  openai_client, which should be set up prior to calling this function.
151
  """
152
-
153
  # Generates a response from the model based on the interactive messages provided
154
  response = openai_client.chat.completions.create(
155
  model="gpt-3.5-turbo", # The AI model being queried for a response
@@ -162,7 +124,7 @@ def call_gpt(prompt: str, content: str) -> str:
162
  {"role": "assistant", "content": "What is the background content?"},
163
  # User providing the background content
164
  {"role": "user", "content": content},
165
- ]
166
  )
167
 
168
  # Extracts and returns the response content from the model's completion
@@ -171,28 +133,22 @@ def call_gpt(prompt: str, content: str) -> str:
171
 
172
  together_client = Together(api_key=os.environ["TOGETHER_API_KEY"])
173
 
 
174
  def call_llama(prompt: str) -> str:
175
  """
176
- Send a prompt to the Llama model and return the response.
177
- Args:
178
- prompt (str): The input prompt to send to the Llama model.
179
- Returns:
180
- str: The response from the Llama model.
181
  """
182
 
183
  # Create a completion request with the prompt
184
  response = together_client.chat.completions.create(
185
-
186
  # Use the Llama-3-8b-chat-hf model
187
  model="meta-llama/Llama-3-8b-chat-hf",
188
-
189
  # Define the prompt as a user message
190
- messages=[
191
- {
192
- "role": "user",
193
- "content": prompt # Use the input prompt
194
- }
195
- ],
196
  )
197
 
198
  # Return the content of the first response message
@@ -321,4 +277,4 @@ def query_search(
321
  # Sort the DataFrame based on the 'qim' score in descending order
322
  refs = refs.sort_values(by="qim", ascending=False)
323
 
324
- return refs
 
15
  return now.year
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def read_and_textify(
19
  files: List[str], chunk_size: int = 2 # Default chunk size set to 50
20
  ) -> Tuple[List[str], List[str]]:
 
47
  text = pageObj.extract_text() # Extract text from the page
48
  if text:
49
  # Split text into chunks of approximately 'chunk_size' words
50
+ words = text.split(". ")
51
  for j in range(0, len(words), chunk_size):
52
+ chunk = ". ".join(words[j : j + chunk_size]) + "."
53
  text_list.append(chunk)
54
  # Create a source identifier for each chunk and add it to the list
55
  sources_list.append(f"{file.name}_page_{i}_chunk_{j // chunk_size}")
 
96
  """
97
  Sends a structured conversation context including a system prompt, user prompt,
98
  and additional background content to the GPT-3.5-turbo model for a response.
99
+
100
  This function is responsible for generating an AI-powered response by interacting
101
  with the OpenAI API. It puts together a preset system message, a formatted user query,
102
  and additional background information before requesting the completion from the model.
103
+
104
  Args:
105
  prompt (str): The main question or topic that the user wants to address.
106
  content (str): Additional background information or details relevant to the prompt.
107
+
108
  Returns:
109
  str: The generated response from the GPT model based on the given prompts and content.
110
+
111
  Note: 'openai_client' is assumed to be an already created and authenticated instance of the OpenAI
112
  openai_client, which should be set up prior to calling this function.
113
  """
114
+
115
  # Generates a response from the model based on the interactive messages provided
116
  response = openai_client.chat.completions.create(
117
  model="gpt-3.5-turbo", # The AI model being queried for a response
 
124
  {"role": "assistant", "content": "What is the background content?"},
125
  # User providing the background content
126
  {"role": "user", "content": content},
127
+ ],
128
  )
129
 
130
  # Extracts and returns the response content from the model's completion
 
133
 
134
  together_client = Together(api_key=os.environ["TOGETHER_API_KEY"])
135
 
136
+
137
  def call_llama(prompt: str) -> str:
138
  """
139
+ Send a prompt to the Llama model and return the response.
140
+ Args:
141
+ prompt (str): The input prompt to send to the Llama model.
142
+ Returns:
143
+ str: The response from the Llama model.
144
  """
145
 
146
  # Create a completion request with the prompt
147
  response = together_client.chat.completions.create(
 
148
  # Use the Llama-3-8b-chat-hf model
149
  model="meta-llama/Llama-3-8b-chat-hf",
 
150
  # Define the prompt as a user message
151
+ messages=[{"role": "user", "content": prompt}], # Use the input prompt
 
 
 
 
 
152
  )
153
 
154
  # Return the content of the first response message
 
277
  # Sort the DataFrame based on the 'qim' score in descending order
278
  refs = refs.sort_values(by="qim", ascending=False)
279
 
280
+ return refs