vaugheu commited on
Commit
beb0e8e
Β·
verified Β·
1 Parent(s): bb0a195

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +310 -0
app.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import google.generativeai as genai
6
+ import os
7
+ from io import BytesIO, TextIOWrapper
8
+ import PyPDF2
9
+ import docx2txt
10
+ import csv
11
+ from huggingface_hub import InferenceClient
12
+
13
+ st.title('πŸ‘€ AI Playground ')
14
+
15
+ st.text('Web Scraping with Pandas and Streamlit, Gemini, Mistral, and Phi-3')
16
+
17
+ Model = st.selectbox("Select your prefered model:", ["GEMINI", "MISTRAL8X", "PHI-3", "Custom Models"])
18
+
19
+ if Model == "GEMINI":
20
+ tkey = st.text_input("Your Token or API key here:", "")
21
+
22
+
23
+ # Button to trigger scraping
24
+ # if st.button('Scrape Data'):
25
+ # if url:
26
+ # if 'https://' not in url:
27
+ # url = 'https://' + url
28
+ # scraped_data = scrape_data(url)
29
+ # paragraph = ' '.join(scraped_data['Text'].dropna())
30
+ # st.write(scraped_data)
31
+ # st.write(paragraph)
32
+
33
+ # else:
34
+ # st.write('Please enter a valid website URL')
35
+
36
+
37
+ # Set up the model
38
+ generation_config = {
39
+ "temperature": 0.9,
40
+ "top_p": 1,
41
+ "top_k": 1,
42
+ "max_output_tokens": 2048,
43
+ }
44
+
45
+ safety_settings = [
46
+ {
47
+ "category": "HARM_CATEGORY_HARASSMENT",
48
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
49
+ },
50
+ {
51
+ "category": "HARM_CATEGORY_HATE_SPEECH",
52
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
53
+ },
54
+ {
55
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
56
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
57
+ },
58
+ {
59
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
60
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
61
+ },
62
+ ]
63
+
64
+ model = genai.GenerativeModel(model_name="gemini-pro",
65
+ generation_config=generation_config,
66
+ safety_settings=safety_settings)
67
+
68
+ genai.configure(api_key=tkey)
69
+
70
+ def gai(inp):
71
+ return model.generate_content(inp).text
72
+
73
+ ################################################################################################################
74
+
75
+ else:
76
+ tkey = st.text_input("HuggingFace token here:", "")
77
+
78
+ if Model == "MISTRAL8X":
79
+ mkey= "mistralai/Mixtral-8x7B-Instruct-v0.1"
80
+ elif Model == "PHI-3":
81
+ mkey = "microsoft/Phi-3-mini-4k-instruct"
82
+ else:
83
+ mkey = st.text_input("Your HuggingFace Model String here:", "")
84
+
85
+ def format_prompt(message, history):
86
+ prompt = ""
87
+ for user_prompt, bot_response in history:
88
+ prompt += f"[INST] {user_prompt} [/INST]"
89
+ prompt += f" {bot_response} "
90
+ prompt += f"[INST] {message} [/INST]"
91
+ return prompt
92
+
93
+ def generate(prompt, history=[], temperature=0.9, max_new_tokens=1024, top_p=0.95, repetition_penalty=1.0):
94
+ temperature = float(temperature)
95
+ if temperature < 1e-2:
96
+ temperature = 1e-2
97
+ top_p = float(top_p)
98
+
99
+ generate_kwargs = dict(
100
+ temperature=temperature,
101
+ max_new_tokens=max_new_tokens,
102
+ top_p=top_p,
103
+ repetition_penalty=repetition_penalty,
104
+ do_sample=True,
105
+ seed=42,
106
+ )
107
+
108
+ formatted_prompt = format_prompt(prompt, history)
109
+
110
+ client = InferenceClient(model= mkey, token=tkey)
111
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
112
+ output = ""
113
+
114
+ for response in stream:
115
+ output += response.token.text
116
+
117
+ output = output.replace("<s>", "").replace("</s>", "")
118
+
119
+ yield output
120
+ return output
121
+
122
+
123
+ # history = []
124
+ # while True:
125
+ # user_input = input("You: ")
126
+ # if user_input.lower() == "off":
127
+ # break
128
+ # history.append((user_input, ""))
129
+ # for response in generate(user_input, history):
130
+ # print("Bot:", response)
131
+
132
+ def gai(query):
133
+ x=''
134
+ for response in generate(query):
135
+ x+=response
136
+ return x
137
+
138
+ ################################################################################################################
139
+
140
+
141
+ # bg image
142
+ page_bg_img = """
143
+ <style>
144
+ [data-testid="stAppViewContainer"] {
145
+ background-image: url(
146
+ https://cdn.wallpapersafari.com/41/41/vIdSZT.jpg
147
+ );
148
+ background-size: cover;
149
+ }
150
+ </style>
151
+ """
152
+ st.markdown(page_bg_img, unsafe_allow_html=True)
153
+
154
+ inp = st.text_input("Enter a prompt and let AI craft stories, poems, code, and more.", "")
155
+
156
+ # Function to scrape data
157
+ def scrape_data(url):
158
+ # Send HTTP request and parse content
159
+ response = requests.get(url)
160
+ # print(response)
161
+ soup = BeautifulSoup(response.content, 'html.parser')
162
+
163
+ # Scraping logic - use BeautifulSoup to find and extract various types of content
164
+ texts = [element.text for element in soup.find_all(['p', 'a', 'img'])]
165
+ links = [element.get('href') for element in soup.find_all('a') if element.get('href')]
166
+ images = [element.get('src') for element in soup.find_all('img') if element.get('src')]
167
+
168
+ # Ensure all lists are of the same length by padding the shorter ones with None
169
+ max_length = max(len(texts), len(links), len(images))
170
+ texts += [None] * (max_length - len(texts))
171
+ links += [None] * (max_length - len(links))
172
+ images += [None] * (max_length - len(images))
173
+
174
+ # Create a DataFrame using pandas for texts, links, and images
175
+ data = {'Text': texts, 'Links': links, 'Images': images}
176
+ df = pd.DataFrame(data)
177
+
178
+ # return the processed data
179
+ return df
180
+
181
+ # Function to extract text from a PDF file
182
+ def extract_text_from_pdf(file_bytes):
183
+ pdf_reader = PyPDF2.PdfReader(BytesIO(file_bytes))
184
+ num_pages = len(pdf_reader.pages)
185
+
186
+ text = ""
187
+ for page_num in range(num_pages):
188
+ page = pdf_reader.pages[page_num]
189
+ text += page.extract_text()
190
+
191
+ return text.replace('\t', ' ').replace('\n', ' ')
192
+
193
+ # Function to extract text from a TXT file
194
+ def extract_text_from_txt(file_bytes):
195
+ text = file_bytes.decode('utf-8')
196
+ return text
197
+
198
+ # Function to extract text from a DOCX file
199
+ def extract_text_from_docx(file_bytes):
200
+ docx = docx2txt.process(BytesIO(file_bytes))
201
+ return docx.replace('\t', ' ').replace('\n', ' ')
202
+
203
+ def extract_text_from_csv(file_bytes, encoding='utf-8'):
204
+ # Convert bytes to text using the specified encoding
205
+ file_text = file_bytes.decode(encoding)
206
+
207
+ # Use CSV reader to read the content
208
+ csv_reader = csv.reader(TextIOWrapper(BytesIO(file_text.encode(encoding)), encoding=encoding))
209
+
210
+ # Concatenate all rows and columns into a single text
211
+ text = ""
212
+ for row in csv_reader:
213
+ text += ' '.join(row) + ' '
214
+
215
+ return text.replace('\t', ' ').replace('\n', ' ')
216
+
217
+
218
+
219
+ url_input = st.checkbox("Use website input")
220
+ url = ""
221
+ if url_input:
222
+ # Input for the website URL
223
+ url = st.text_input('Enter the website URL (optional): ', '')
224
+
225
+ file_input = st.checkbox("Use file input")
226
+ uploaded_file = None
227
+
228
+ sp_prompt = ""
229
+ prompt_input = st.checkbox("Use special prompt input")
230
+ if prompt_input:
231
+ sp_prompt = st.selectbox("Special Prompt (Optional):", [
232
+ "Prompt A: Explain the following with proper details.",
233
+ "Prompt B: Describe the whole thing in a nutshell.",
234
+ "Prompt C: How this can be useful for us?"
235
+ ])
236
+
237
+ if file_input:
238
+ # Add file uploader
239
+ st.write("Upload a PDF, TXT, or DOCX file to extract the text.")
240
+ uploaded_file = st.file_uploader("Choose a file")
241
+
242
+ if uploaded_file:
243
+ # Get the file extension
244
+ file_name, file_extension = os.path.splitext(uploaded_file.name)
245
+
246
+ if file_extension:
247
+ # Extract text based on the file extension
248
+ if file_extension == ".pdf":
249
+ uploaded_file = extract_text_from_pdf(uploaded_file.getvalue())
250
+ elif file_extension == ".txt":
251
+ uploaded_file = extract_text_from_txt(uploaded_file.getvalue())
252
+ elif file_extension == ".docx":
253
+ uploaded_file = extract_text_from_docx(uploaded_file.getvalue())
254
+ elif file_extension == ".csv":
255
+ uploaded_file = extract_text_from_csv(uploaded_file.getvalue())
256
+
257
+ else:
258
+ st.error("Unsupported file type.")
259
+
260
+ output = ''
261
+ previous_responses = []
262
+ if st.button("Generate"):
263
+ if tkey == '':
264
+ st.error("Need to input Token or API key.")
265
+
266
+ if url:
267
+ if 'https://' not in url:
268
+ url = 'https://' + url
269
+ scraped_data = scrape_data(url)
270
+ paragraph = ' '.join(scraped_data['Text'].dropna())
271
+ # st.write(scraped_data)
272
+ # st.write(paragraph)
273
+
274
+ inp = paragraph + ' ' +"Take the given data above, as information and generate a response based on this prompt: " + inp
275
+
276
+ if sp_prompt:
277
+ inp = inp + " " + sp_prompt
278
+ if uploaded_file:
279
+ inp = inp + " " + uploaded_file
280
+
281
+ if inp:
282
+ # st.write(inp)
283
+ output = gai(inp)
284
+ st.write(output)
285
+
286
+ # # Add response to the list of previous_responses
287
+ # previous_responses.append(output)
288
+
289
+ # # Display all previous responses
290
+ # st.subheader("Previous Responses:")
291
+ # for i, response in enumerate(previous_responses, start=1):
292
+ # st.write(f"{i}. {response}")
293
+
294
+
295
+ # Add download button
296
+ if output is not None:
297
+ # filename = 'Generated_Answer.txt'
298
+ # with open(filename, 'w') as f:
299
+ # f.write(output)
300
+
301
+ # Add select box
302
+ ofType = 'txt'
303
+ #ofType = st.selectbox("Chose an output file type: ", ["TXT", "PY", "HTML"])
304
+ st.download_button("Download File", data = output, file_name= f"Generated Answer.{ofType}")
305
+ else:
306
+ st.error("Please enter a prompt to generate text.")
307
+
308
+ #st.subheader("[πŸ”—...Visit my GitHub Profile...πŸ”—](https://github.com/NafisRayan)")
309
+
310
+ # streamlit run app.py