KushwanthK commited on
Commit
1f6076c
·
verified ·
1 Parent(s): 088c08e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -781
app.py CHANGED
@@ -1,823 +1,87 @@
1
- from streamlit import session_state as ss
2
- from streamlit_pdf_viewer import pdf_viewer
3
- import streamlit_pdf_viewer
4
-
5
- import streamlit as st
6
-
7
-
8
- # # Declare variable.
9
- # if 'pdf_ref' not in ss:
10
- # ss.pdf_ref = None
11
-
12
-
13
- # # Access the uploaded ref via a key.
14
- # st.file_uploader("Upload PDF file", type=('pdf'), key='pdf')
15
-
16
- # if ss.pdf:
17
- # ss.pdf_ref = ss.pdf # backup
18
-
19
- # # Now you can access "pdf_ref" anywhere in your app.
20
- # if ss.pdf_ref:
21
- # binary_data = ss.pdf_ref.getvalue()
22
- # pdf_viewer(input=binary_data, width=700)
23
-
24
- # import base64
25
-
26
- # def displayPDF(file):
27
- # # Opening file from file path
28
- # with open(file, "rb") as f:
29
- # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
30
-
31
- # # Embedding PDF in HTML
32
- # pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf"></iframe>'
33
-
34
- # # Displaying File
35
- # st.markdown(pdf_display, unsafe_allow_html=True)
36
-
37
- # displayPDF("../Transformers/Bhagavad-Gita-As-It-Is.pdf")
38
-
39
-
40
- # import streamlit as st
41
- # import streamlit_pdf_viewer
42
-
43
- # def displayPDF(file):
44
- # with open(file, "rb") as f:
45
- # pdf_bytes = f.read()
46
-
47
- # streamlit_pdf_viewer.pdf_viewer(pdf_bytes)
48
-
49
- # displayPDF("../Transformers/Bhagavad-Gita-As-It-Is.pdf")
50
- # Arial Unicode.ttf
51
-
52
- # import streamlit as st
53
- # import fitz # PyMuPDF library
54
- # from PIL import Image, ImageDraw, ImageFont
55
- # import io
56
- # import numpy as np
57
-
58
- # def display_pdf_with_highlight(file_path, keywords):
59
- # # Open the PDF file
60
- # with fitz.open(file_path) as doc:
61
- # # Create a new PDF file to hold the highlighted pages
62
- # highlighted_pdf = fitz.open()
63
-
64
- # # Iterate over each page in the PDF
65
- # for page_index in range(len(doc)):
66
- # page = doc.load_page(page_index)
67
- # pix = page.get_pixmap(dpi=300)
68
- # img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
69
-
70
- # # Create a drawing object and highlight the keywords
71
- # draw = ImageDraw.Draw(img)
72
- # font = ImageFont.truetype("Arial Unicode.ttf", 14) # Replace with your desired font
73
- # for keyword in keywords:
74
- # areas = page.search_for(keyword)
75
- # for area in areas:
76
- # bbox = area.bbox
77
- # draw.rectangle(bbox, outline="yellow", width=3)
78
-
79
- # # Convert the highlighted image to a NumPy array
80
- # img_np = np.asarray(img)
81
-
82
- # # Create a MuPDF-compatible Pixmap from the NumPy array
83
- # muimg = fitz.Pixmap(fitz.csRGB, img_np.shape[1], img_np.shape[0])
84
- # muimg.set_data(img_np.tobytes())
85
-
86
- # # Create a new PDF page and insert the highlighted image
87
- # new_page = highlighted_pdf.new_page(-1, width=muimg.width, height=muimg.height)
88
- # new_page.insert_image(fitz.Rect(0, 0, muimg.width, muimg.height), stream=muimg)
89
-
90
- # # Create a BytesIO object to hold the highlighted PDF data
91
- # pdf_bytes = io.BytesIO()
92
- # highlighted_pdf.write(pdf_bytes)
93
- # pdf_bytes.seek(0)
94
-
95
- # # Display the highlighted PDF in Streamlit
96
- # st.download_button(
97
- # label="Download Highlighted PDF",
98
- # data=pdf_bytes.getvalue(),
99
- # file_name="highlighted_pdf.pdf",
100
- # mime="application/pdf",
101
- # )
102
-
103
- # # Example usage
104
- # file_path = "../Transformers/Bhagavad-Gita-As-It-Is.pdf"
105
- # keywords = ["Arjuna", "Krishna"]
106
- # display_pdf_with_highlight(file_path, keywords)
107
-
108
- # import pyperclip
109
-
110
- # content = str(pyperclip.paste())
111
-
112
- # import streamlit as st
113
- # import fitz
114
-
115
- # def annotate_pdf(file_path, text_to_highlight):
116
- # # Open the PDF file
117
- # with fitz.open(file_path) as doc:
118
- # # Create a new PDF file to hold the annotated pages
119
- # annotated_pdf = fitz.open()
120
-
121
- # # Iterate over each page in the PDF
122
- # for page_index in range(len(doc)):
123
- # page = doc.load_page(page_index)
124
-
125
- # # Search for the text to highlight
126
- # areas = page.search_for(text_to_highlight)
127
-
128
- # # Add rectangle annotations for the highlighted areas
129
- # for area in areas:
130
- # page.add_rect_annot(area)
131
-
132
- # # Create a new PDF page and insert the annotated page
133
- # new_page = annotated_pdf.new_page(-1, width=page.rect.width, height=page.rect.height)
134
- # new_page.show_pdf_page(page.rect, doc, page_index)
135
-
136
- # # Create a BytesIO object to hold the annotated PDF data
137
- # pdf_bytes = annotated_pdf.write()
138
-
139
- # # Display the annotated PDF in Streamlit
140
- # st.download_button(
141
- # label="Download Annotated PDF",
142
- # data=pdf_bytes,
143
- # file_name="annotated_pdf.pdf",
144
- # mime="application/pdf",
145
- # )
146
-
147
- # # Example usage
148
- # file_path = "../Transformers/Bhagavad-Gita-As-It-Is.pdf"
149
- # text_to_highlight = "Arjuna"
150
- # annotate_pdf(file_path, text_to_highlight)
151
-
152
- # def displayPDF(file):
153
- # # Opening file from file path
154
- # with open(file, "rb") as f:
155
- # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
156
-
157
- # # Embedding PDF in HTML
158
- # pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf"></iframe>'
159
-
160
- # # Displaying File
161
- # st.markdown(pdf_display, unsafe_allow_html=True)
162
-
163
- # displayPDF("../Transformers/Bhagavad-Gita-As-It-Is.pdf")
164
-
165
- # import streamlit as st
166
-
167
- # import fitz
168
-
169
- # def annotate_pdf(file_path, text_to_highlight):
170
- # # Open the PDF file
171
- # with fitz.open(file_path) as doc:
172
- # # Create a new PDF file to hold the annotated pages
173
- # annotated_pdf = fitz.open()
174
-
175
- # # Iterate over each page in the PDF
176
- # for page_index in range(len(doc)):
177
- # page = doc.load_page(page_index)
178
-
179
- # # Search for the text to highlight
180
- # areas = page.search_for(text_to_highlight)
181
-
182
- # # Add rectangle annotations for the highlighted areas
183
- # for area in areas:
184
- # page.add_rect_annot(area)
185
-
186
- # # Create a new PDF page and insert the annotated page
187
- # new_page = annotated_pdf.new_page(-1, width=page.rect.width, height=page.rect.height)
188
- # new_page.show_pdf_page(page.rect, doc, page_index)
189
-
190
- # # Create a BytesIO object to hold the annotated PDF data
191
- # pdf_bytes = annotated_pdf.write()
192
-
193
- # streamlit_pdf_viewer.pdf_viewer(pdf_bytes)
194
-
195
- # # Display the annotated PDF in Streamlit
196
- # st.download_button(
197
- # label="Download Annotated PDF",
198
- # data=pdf_bytes,
199
- # file_name="annotated_pdf.pdf",
200
- # mime="application/pdf",
201
- # )
202
-
203
- # # Example usage
204
- # file_path = "../Transformers/Bhagavad-Gita-As-It-Is.pdf"
205
- # text_to_highlight = "Krishna"
206
- # annotate_pdf(file_path, text_to_highlight)
207
-
208
-
209
- # import streamlit as st
210
- # import fitz
211
- # import io
212
-
213
- # def annotate_pdf(uploaded_file, text_to_highlight):
214
- # try:
215
- # # Open the PDF file from the file-like object
216
- # doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
217
-
218
- # # Create a new PDF file to hold the annotated pages
219
- # annotated_pdf = fitz.open()
220
-
221
- # # Iterate over each page in the PDF
222
- # for page_index in range(len(doc)):
223
- # page = doc.load_page(page_index)
224
-
225
- # # Search for the text to highlight
226
- # areas = page.search_for(text_to_highlight)
227
-
228
- # # Add rectangle annotations for the highlighted areas
229
- # for area in areas:
230
- # page.add_rect_annot(area)
231
-
232
- # # Create a new PDF page and insert the annotated page
233
- # new_page = annotated_pdf.new_page(-1, width=page.rect.width, height=page.rect.height)
234
- # new_page.show_pdf_page(page.rect, doc, page_index)
235
-
236
- # # Create a BytesIO object to hold the annotated PDF data
237
- # pdf_bytes = io.BytesIO(annotated_pdf.write())
238
-
239
- # # Display the annotated PDF in Streamlit
240
- # st.download_button(
241
- # label="Download Annotated PDF",
242
- # data=pdf_bytes.getvalue(),
243
- # file_name="annotated_pdf.pdf",
244
- # mime="application/pdf",
245
- # )
246
- # except Exception as e:
247
- # st.error(f"An error occurred: {str(e)}")
248
-
249
- # # Streamlit app
250
- # def main():
251
- # st.title("PDF Annotation App")
252
- # uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
253
- # if uploaded_file is not None:
254
- # text_to_highlight = st.text_input("Enter text to highlight")
255
- # if text_to_highlight:
256
- # annotate_pdf(uploaded_file, text_to_highlight)
257
-
258
- # if __name__ == "__main__":
259
- # main()
260
-
261
-
262
- # file_path = "../Transformers/Bhagavad-Gita-As-It-Is.pdf"
263
- # text_to_highlight = "Krishna"
264
- # annotate_pdf(file_path, text_to_highlight)
265
-
266
- # import fitz
267
-
268
- # import base64
269
-
270
- # def displayPDF(file):
271
-
272
- # # Open the PDF document
273
- # doc = fitz.open("my_pdf.pdf")
274
-
275
- # # Get the first page of the document
276
- # page = doc.loadPage(4)
277
-
278
- # # Search for the text string to highlight
279
- # text_to_highlight = "Supreme Personality of Godhead"
280
-
281
- # # Create a rectangle around the text to highlight
282
- # highlight_rect = fitz.Rect(page.searchFor(text_to_highlight)[0])
283
-
284
- # # Create a highlight annotation
285
- # highlight_annot = fitz.Annot(page, highlight_rect, "Highlight", {"color": fitz.utils.getColor("yellow")})
286
-
287
- # # Add the annotation to the page
288
- # page.addAnnot(highlight_annot)
289
-
290
- # # Save the document
291
- # doc.save("my_pdf_highlighted.pdf")
292
-
293
- # # Opening file from file path
294
- # with open(file, "rb") as f:
295
- # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
296
-
297
- # # Embedding PDF in HTML
298
- # pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf"></iframe>'
299
-
300
- # # Displaying File
301
- # st.markdown(pdf_display, unsafe_allow_html=True)
302
-
303
- # displayPDF("../Transformers/Bhagavad-Gita-As-It-Is.pdf")
304
-
305
-
306
- import streamlit as st
307
- # import fitz
308
- import tempfile
309
-
310
- # Import the PDF_ANNOT_HIGHLIGHT constant
311
- # from fitz.PDF_ANNOT import PDF_ANNOT_HIGHLIGHT
312
-
313
- import base64
314
- import io
315
-
316
-
317
- def display_highlighted_pdf(file_path, text_to_highlight):
318
- # Open the PDF document
319
- doc = pymupdf.open(file_path)
320
-
321
- # Iterate over each page in the PDF
322
- for page_index in range(len(doc)):
323
- page = doc.load_page(page_index)
324
-
325
- # Search for the text string to highlight
326
- areas = page.search_for(text_to_highlight)
327
-
328
- # Create a highlight annotation for each area
329
- for area in areas:
330
- highlight_rect = pymupdf.Rect(area)
331
- highlight_annot = page.add_highlight_annot(highlight_rect) #fitz.Annot(page.parent, highlight_rect, annot_type=fitz.PDF_ANNOT_HIGHLIGHT)
332
- highlight_annot.set_colors({"stroke": pymupdf.utils.getColor("yellow")})
333
- highlight_annot.update()
334
- # page.add_annot(highlight_annot)
335
-
336
- # Create a BytesIO object to hold the highlighted PDF data
337
- # Create a temporary file to save the PDF
338
- with tempfile.NamedTemporaryFile(delete=False) as temp_file:
339
- temp_file_path = temp_file.name
340
- doc.save(temp_file_path)
341
-
342
- # Read the content of the temporary file into a BytesIO object
343
- with open(temp_file_path, "rb") as f:
344
- pdf_bytes = io.BytesIO(f.read())
345
-
346
- # # Remove the temporary file
347
- # st.unlink(temp_file_path)
348
-
349
- # pdf_bytes = io.BytesIO()
350
- # doc.write(pdf_bytes)
351
- # pdf_bytes.seek(0)
352
-
353
- # Encode the PDF data as base64
354
- base64_pdf = base64.b64encode(pdf_bytes.getvalue()).decode('utf-8')
355
-
356
- # Embed the PDF in an HTML iframe
357
- pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" height="1600" width="680" type="application/pdf"></iframe>'
358
-
359
- # Display the PDF in Streamlit
360
- st.markdown(pdf_display, unsafe_allow_html=True)
361
-
362
-
363
- # Example usage
364
- file_path = "Bhagavad-Gita-As-It-Is.pdf"
365
- text_to_highlight = "Supreme Personality of Godhead"
366
- # display_highlighted_pdf(file_path, text_to_highlight)
367
-
368
-
369
-
370
- # import streamlit as st
371
-
372
- # def display_pdf(pdf_path):
373
- # # Read the PDF file
374
- # with open(pdf_path, "rb") as file:
375
- # pdf_bytes = file.read()
376
-
377
- # # Encode the PDF data as base64
378
- # base64_pdf = base64.b64encode(pdf_bytes).decode("utf-8")
379
-
380
- # # Embed the PDF in an HTML iframe
381
- # pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf"></iframe>'
382
-
383
- # # Display the PDF in Streamlit
384
- # st.markdown(pdf_display, unsafe_allow_html=True)
385
-
386
- # # Example usage
387
- # pdf_path = "../Transformers/Bhagavad-Gita-As-It-Is.pdf"
388
- # display_pdf(pdf_path)
389
-
390
- # import pymupdf
391
-
392
- # # Open the PDF file
393
- # doc = pymupdf.open(file_path)
394
-
395
- # # Get the first page of the PDF
396
- # # page = pdf_file.pa
397
-
398
- # for page in doc: # iterate the document pages
399
- # text_coordinates = page.search_for("Bhagavad", quads=True)
400
- # # Highlight the text
401
- # page.add_highlight_annot(text_coordinates)
402
-
403
- # # Get the text of the page
404
- # text = page.get_text()
405
-
406
- # # Find the text to highlight
407
- # text_to_highlight = "Bhagavad"
408
-
409
- # # Get the coordinates of the text to highlight
410
- # text_coordinates = page.search_for(text_to_highlight)
411
-
412
- # Highlight the text
413
- # page.add_highlight_annot(text_coordinates)
414
-
415
- # Save the PDF file
416
- # doc.save("example_highlighted.pdf")
417
-
418
- import pymupdf
419
  import tempfile
420
  import nltk
421
-
422
- nltk.download('stopwords')
423
  from nltk.corpus import stopwords
424
  from collections import Counter
425
  from streamlit_image_zoom import image_zoom
426
  from PIL import Image
 
427
 
428
- def highlight_pdf(file_path, text_to_highlight, page_numbers):
429
- # Create a temporary file to save the modified PDF
430
- # temp_pdf_path = "temp_highlighted_pdf.pdf"
431
- # Create a temporary file to save the modified PDF
432
- # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
433
- # temp_pdf_path = temp_file.name
434
 
 
435
  # Open the original PDF
436
  doc = pymupdf.open(file_path)
437
-
438
  pages_to_display = [doc.load_page(page_number - 1) for page_number in page_numbers]
439
 
440
  # Tokenize the text into words
441
  words = text_to_highlight.split()
442
 
443
-
444
-
445
  # Remove stopwords
446
  stop_words = set(stopwords.words("english"))
447
  words = [word for word in words if word.lower() not in stop_words]
448
-
449
- print(words)
450
-
451
- # Count the frequency of each word
452
- word_counts = Counter(words)
453
 
454
- # Get the top N most frequent words
455
- # top_words = [word for word, _ in word_counts.most_common(5)]
456
-
457
- # Iterate over each page in the PDF
458
  for page in pages_to_display:
459
-
460
- # Highlight the specified words on the canvas
461
  for word in words:
462
- highlight_rect = page.search_for(word, quads=True)
463
- # Highlight the text
464
- # highlight_rect = pymupdf.Rect(word)
465
- # highlight_annot = page.add_highlight_annot(highlight_rect)
466
- # highlight_annot.set_colors({"stroke": pymupdf.utils.getColor("yellow")})
467
- # highlight_annot.update()
468
- page.add_highlight_annot(highlight_rect)
469
-
470
- # Create a new document with only the specified pages
471
  new_doc = pymupdf.open()
 
 
472
  for page in pages_to_display:
473
  new_doc.insert_pdf(doc, from_page=page.number, to_page=page.number)
 
474
 
475
- # Save the modified PDF
476
- # Save the document to a temporary file
477
  with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_file:
478
  temp_pdf_path = temp_file.name
479
- new_doc.save(temp_pdf_path)
480
-
481
- print(temp_pdf_path)
482
 
483
  new_doc.save("example_highlighted.pdf")
484
 
485
- return temp_pdf_path
486
-
487
- # Example usage
488
 
489
  def pdf_to_images(pdf_path, page_numbers):
490
  doc = pymupdf.open(pdf_path)
491
  images = []
492
- for page_number in range(1, len(page_numbers)+1):
493
  page = doc.load_page(page_number - 1)
494
  pix = page.get_pixmap()
495
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
496
  images.append(img)
497
  return images
498
 
499
- # Function to display PDF in Streamlit
500
  def display_highlighted_pdf():
501
  pdf_path = "Bhagavad-Gita-As-It-Is.pdf"
502
- sources = [7,8]
503
- response_text = "I offer my respectful obeisances unto the lotus feet of my spiritual master and unto the feet of all Vaiñëavas. I offer my respectful"
504
-
505
- highlighted_pdf_path = highlight_pdf(file_path=file_path, text_to_highlight=response_text, page_numbers=sources)
506
-
507
- print(highlighted_pdf_path)
508
-
509
- # with open(highlighted_pdf_path, "rb") as file:
510
- # pdf_bytes = file.read()
511
-
512
- # # Use pdf_viewer to display the PDF in Streamlit
513
- # pdf_viewer(pdf_bytes, width=700)
514
- images = pdf_to_images(highlighted_pdf_path, sources)
515
-
516
- for img in images:
517
- if isinstance(img, Image.Image): # Ensure img is a Pillow Image object
518
- image_zoom(img)
519
- else:
520
- st.error("The provided image is not a valid Pillow Image object.")
521
-
522
- display_highlighted_pdf()
523
-
524
-
525
- # import streamlit as st
526
- # import streamlit.components.v1 as components
527
- # path_to_html = "https://vedabase.io/en/library/bg/1/1/"
528
-
529
- # # with open(path_to_html,'r') as f:
530
- # # html_data = f.read()
531
-
532
- # # # Show in webpage
533
- # # st.header("Show an external HTML")
534
- # # st.components.v1.html(html_data)
535
-
536
- # try:
537
- # with open(path_to_html, 'r') as f:
538
- # # Your file processing code here
539
- # html_data = f.read()
540
- # st.header("Show an external HTML")
541
- # st.components.v1.html(html_data)
542
- # except FileNotFoundError:
543
- # print("File not found. Please check the file path.")
544
- # except Exception as e:
545
- # print(f"An error occurred: {e}")
546
-
547
-
548
- # import streamlit as st
549
- # import requests
550
- # from bs4 import BeautifulSoup
551
-
552
- # def fetch_html(url):
553
- # # Fetch the webpage content
554
- # response = requests.get(url)
555
- # if response.status_code == 200:
556
- # return response.content
557
- # else:
558
- # st.error(f"Failed to fetch webpage. Status code: {response.status_code}")
559
-
560
- # def scrape_data(html_content):
561
- # # Parse HTML content
562
- # soup = BeautifulSoup(html_content, "html.parser")
563
-
564
- # # Scrape data (replace this with your specific scraping logic)
565
- # data = soup.find_all(class_="container first-container")
566
-
567
- # return data
568
-
569
- # def main(url):
570
- # st.title("Webpage Scraper")
571
-
572
- # # User input for webpage URL
573
- # url = st.text_input("Enter the URL of the webpage:", value=url)
574
-
575
- # # Convert webpage to HTML and scrape data
576
- # if st.button("Scrape Data"):
577
- # if url:
578
- # html_content = fetch_html(url)
579
- # str_content = """ """
580
- # if html_content:
581
- # data = scrape_data(html_content)
582
- # # st.title("HTML Page Display")
583
- # # st.components.v1.html(html_content, height=1600, width=800)
584
- # # Display scraped data in a new tab
585
- # with st.expander("Scraped Data", expanded=True):
586
- # for item in data:
587
- # # Convert item to string and display
588
- # str_content += str(item)
589
- # # st.write(str(item))
590
- # # st.title("HTML Page Display")
591
- # # st.components.v1.html(data, height=1600, width=800)
592
- # # st.title("HTML Page Display")
593
- # st.components.v1.html(str_content, height=1600, width=680)
594
- # main()
595
-
596
-
597
- # import streamlit as st
598
-
599
- # html_content = """
600
- # <div class="container first-container"> <div class="row"> <div class="col-12 breadcrumb"> <a href="/en/library/">Library</a> »
601
- # <a href="/en/library/bg/">Bhagavad-gītā As It Is</a> »
602
-
603
- # <a href="/en/library/bg/1/">Chapter One</a>
604
-
605
- # </div> </div> <div class="row" id="content" tabindex="-1"> <div class="col-12"> <div class="r r-title r-verse" id="bb181"> <h1>Bg. 1.1</h1> </div> <div class="wrapper-devanagari"> <h2 class="section-title none">Devanagari</h2> <div class="r r-devanagari" id="bb567886">धृतराष्ट्र उवाच<br/>धर्मक्षेत्रे कुरुक्षेत्रे समवेता युयुत्सव: ।<br/>मामका: पाण्डवाश्चैव किमकुर्वत सञ्जय ॥ १ ॥</div> </div> <div class="wrapper-verse-text"> <h2 class="section-title none">Text</h2> <div class="r r-lang-en r-verse-text" id="bb183"><em><em>dhṛtarāṣṭra uvāca</em><br/>dharma-kṣetre kuru-kṣetre<br/>samavetā yuyutsavaḥ<br/>māmakāḥ pāṇḍavāś caiva<br/>kim akurvata sañjaya</em></div> </div> <div class="wrapper-synonyms"> <h2 class="section-title">Synonyms</h2> <div class="r r-lang-en r-synonyms" id="bb184"><p><a href="/en/search/synonyms/?original=dhṛtarāṣṭraḥ"><em>dhṛtarāṣṭraḥ</em></a> <a href="/en/search/synonyms/?original=uvāca"><em>uvāca</em></a> — King Dhṛtarāṣṭra said; <a href="/en/search/synonyms/?original=dharma"><em>dharma</em></a>-<a href="/en/search/synonyms/?original=kṣetre"><em>kṣetre</em></a> — in the place of pilgrimage; <a href="/en/search/synonyms/?original=kuru"><em>kuru</em></a>-<a href="/en/search/synonyms/?original=kṣetre"><em>kṣetre</em></a> — in the place named Kurukṣetra; <a href="/en/search/synonyms/?original=samavetāḥ"><em>samavetāḥ</em></a> — assembled; <a href="/en/search/synonyms/?original=yuyutsavaḥ"><em>yuyutsavaḥ</em></a> — desiring to fight; <a href="/en/search/synonyms/?original=māmakāḥ"><em>māmakāḥ</em></a> — my party (sons); <a href="/en/search/synonyms/?original=pāṇḍavāḥ"><em>pāṇḍavāḥ</em></a> — the sons of Pāṇḍu; <a href="/en/search/synonyms/?original=ca"><em>ca</em></a> — and; <a href="/en/search/synonyms/?original=eva"><em>eva</em></a> — certainly; <a href="/en/search/synonyms/?original=kim"><em>kim</em></a> — what; <a href="/en/search/synonyms/?original=akurvata"><em>akurvata</em></a> — did they do; <a href="/en/search/synonyms/?original=sañjaya"><em>sañjaya</em></a> — O Sañjaya.</p></div> </div> <div class="wrapper-translation"> <h2 class="section-title">Translation</h2> <div class="r r-lang-en r-translation" id="bb185"><p><strong>Dhṛtarāṣṭra said: O Sañjaya, after my sons and the sons of Pāṇḍu assembled in the place of pilgrimage at Kurukṣetra, desiring to fight, what did they do?</strong></p></div> </div> <div class="wrapper-puport"> <h2 class="section-title">Purport</h2> <div class="r r-lang-en r-paragraph" id="bb186"><p><em><a href="/en/library/bg/">Bhagavad-gītā</a></em> is the widely read theistic science summarized in the <em>Gītā-māhātmya</em> (<em>Glorification of the Gītā</em>). There it says that one should read <em><a href="/en/library/bg/">Bhagavad-gītā</a></em> very scrutinizingly with the help of a person who is a devotee of Śrī Kṛṣṇa and try to understand it without personally motivated interpretations. The example of clear understanding is there in the <em><a href="/en/library/bg/">Bhagavad-gītā</a></em> itself, in the way the teaching is understood by Arjuna, who heard the <em>Gītā</em> directly from the Lord. If someone is fortunate enough to understand the <em><a href="/en/library/bg/">Bhagavad-gītā</a></em> in that line of disciplic succession, without motivated interpretation, then he surpasses all studies of Vedic wisdom, and all scriptures of the world. One will find in the <em><a href="/en/library/bg/">Bhagavad-gītā</a></em> all that is contained in other scriptures, but the reader will also find things which are not to be found elsewhere. That is the specific standard of the <em>Gītā.</em> It is the perfect theistic science because it is directly spoken by the Supreme Personality of Godhead, Lord Śrī Kṛṣṇa.</p></div> <div class="r r-lang-en r-paragraph" id="bb187"><p>The topics discussed by Dhṛtarāṣṭra and Sañjaya, as described in the <em>Mahābhārata,</em> form the basic principle for this great philosophy. It is understood that this philosophy evolved on the Battlefield of Kurukṣetra, which is a sacred place of pilgrimage from the immemorial time of the Vedic age. It was spoken by the Lord when He was present personally on this planet for the guidance of mankind.</p></div> <div class="r r-lang-en r-paragraph" id="bb188"><p>The word <em>dharma-kṣetra</em> (a place where religious rituals are performed) is significant because, on the Battlefield of Kurukṣetra, the Supreme Personality of Godhead was present on the side of Arjuna. Dhṛtarāṣṭra, the father of the Kurus, was highly doubtful about the possibility of his sons’ ultimate victory. In his doubt, he inquired from his secretary Sañjaya, “What did they do?” He was confident that both his sons and the sons of his younger brother Pāṇḍu were assembled in that Field of Kurukṣetra for a determined engagement of the war. Still, his inquiry is significant. He did not want a compromise between the cousins and brothers, and he wanted to be sure of the fate of his sons on the battlefield. Because the battle was arranged to be fought at Kurukṣetra, which is mentioned elsewhere in the <em>Vedas</em> as a place of worship – even for the denizens of heaven – Dhṛtarāṣṭra became very fearful about the influence of the holy place on the outcome of the battle. He knew very well that this would influence Arjuna and the sons of Pāṇḍu favorably, because by nature they were all virtuous. Sañjaya was a student of Vyāsa, and therefore, by the mercy of Vyāsa, Sañjaya was able to envision the Battlefield of Kurukṣetra even while he was in the room of Dhṛtarāṣṭra. And so, Dhṛtarāṣṭra asked him about the situation on the battlefield.</p></div> <div class="r r-lang-en r-paragraph" id="bb189"><p>Both the Pāṇḍavas and the sons of Dhṛtarāṣṭra belong to the same family, but Dhṛtarāṣṭra’s mind is disclosed herein. He deliberately claimed only his sons as Kurus, and he separated the sons of Pāṇḍu from the family heritage. One can thus understand the specific position of Dhṛtarāṣṭra in his relationship with his nephews, the sons of Pāṇḍu. As in the paddy field the unnecessary plants are taken out, so it is expected from the very beginning of these topics that in the religious field of Kurukṣetra, where the father of religion, Śrī Kṛṣṇa, was present, the unwanted plants like Dhṛtarāṣṭra’s son Duryodhana and others would be wiped out and the thoroughly religious persons, headed by Yudhiṣṭhira, would be established by the Lord. This is the significance of the words <em>dharma-kṣetre</em> and <em>kuru-kṣetre,</em> apart from their historical and Vedic importance.</p></div> </div> </div> </div> <div class="row d-print-none"> <div class="col-12"> <ul class="mini-pager mt-2 pb-4"> <li class="pager-prev"><a class="btn" href="/en/library/bg/1/"> <i class="fa fa-chevron-left"></i>
606
- # Previous
607
-
608
- # </a></li>
609
-
610
- # <li class="pager-next"><a class="btn" href="/en/library/bg/1/2/">
611
- # Next
612
-
613
- # <i class="fa fa-chevron-right"></i>
614
-
615
- # </a></li>
616
-
617
- # </ul> </div> </div> <nav class="rich-menu" id="para-menu"> <div class="Panel" data-csrf-token="l7dMxBge1IaZDbFchwWzWmh1CBpo6pWDY9LKjwSlqmvpDKld3RfTLY85AWyycbUS" data-language="en" data-propose-category-url="/categorization/suggest-category/" data-reload-url="/en/paragraph-tool/?page_id=14054&amp;view_slug=index_view&amp;view_args=&amp;next=/en/library/bg/1/1/&amp;page_url=/en/library/bg/1/1/" id="panel"> <h1 class="pt-title">Paragraph Tools <a class="pt-close-button" href="#" onclick="event.preventDefault();_menu.close();return false;">Close <i class="fa fa-times"></i></a></h1> Please select paragraph first. </div> <div class="Panel" id="subpanel"> <p>Suggest this as category.</p> <a class="link-yes btn btn-lg btn-success px-3" href="">Yes</a> <a class="link-cancel btn btn-lg btn-success px-3" href="">Cancel</a> </div> </nav> <div class="paragraph-tool-button no-touch none"> <div class="paragraph-tool-button-nav"> <div class="paragraph-tool-button-trigger" data-target="#para-menu" id="para-menu-button"> <i class="icon fa fa-briefcase fa-2x"></i> <span class="fa-stack fa"> <i class="fa fa-circle fa-stack-1x p-counter"></i> <span class="p-counter fa-stack-1x fa-stack-text file-text fa-inverse"></span> </span> </div> </div> </div> </div>
618
- # """
619
-
620
- # st.title("HTML Page Display")
621
- # st.components.v1.html(html_content, height=1600, width=800)
622
-
623
-
624
-
625
- # import requests
626
- # from bs4 import BeautifulSoup
627
-
628
- # # URL of the webpage
629
- # baseurl = "https://vedabase.io/en/library/bg/"
630
-
631
- # # Fetch the webpage content
632
- # response = requests.get(baseurl)
633
- # if response.status_code == 200:
634
- # html_content = response.content
635
-
636
- # # Parse HTML content
637
- # soup = BeautifulSoup(html_content, "html.parser")
638
-
639
- # # Find all direct child div elements with class="r-chapter"
640
- # direct_child_div_elements = soup.select("div.col-12 > div.r-chapter")
641
-
642
- # # List to store the extracted text
643
- # output = []
644
-
645
- # # Iterate over each direct child div element
646
- # for div in direct_child_div_elements:
647
- # # Find the <a> tag within the div
648
- # a_tag = div.find("a")
649
- # if a_tag:
650
- # # Extract the text from the <a> tag and append it to the output list
651
- # output.append(a_tag.text.strip())
652
-
653
- # # Print the output list
654
- # # print(output)
655
- # # print(len(output))
656
-
657
- # ### Link to all chapters
658
- # import re
659
-
660
- # # Sample text
661
- # chapter = output[4]
662
-
663
- # text_to_number = {
664
- # "One": "1",
665
- # "Two": "2",
666
- # "Three": "3",
667
- # "Four": "4",
668
- # "Five": "5",
669
- # "Six": "6",
670
- # "Seven": "7",
671
- # "Eight": "8",
672
- # "Nine": "9",
673
- # "Ten": "10",
674
- # # Add more numbers if needed
675
- # }
676
-
677
- # # Split the text by spaces
678
- # words = chapter.split()
679
-
680
- # # Find the text representing the number
681
- # number_text = words[1].strip(":") # Assuming the number text is the second word
682
-
683
- # # Extract the numeric part
684
- # chapter_number = text_to_number[number_text]
685
-
686
- # # Print the chapter number
687
- # # print(chapter_number)
688
-
689
- # url = baseurl + chapter_number
690
-
691
- # # print(url)
692
-
693
- # ### all Texts in each chapter
694
-
695
- # response = requests.get(url)
696
- # if response.status_code == 200:
697
- # html_content = response.content
698
-
699
- # # Parse HTML content
700
- # text = BeautifulSoup(html_content, "html.parser")
701
-
702
- # # print(text)
703
-
704
- # # Find all direct child div elements with class="r-chapter"
705
- # direct_child_div_elements = text.select("div.col-12 > dl.r.r-verse")
706
-
707
- # # print(direct_child_div_elements)
708
-
709
- # # List to store the extracted text
710
- # text_number = []
711
-
712
- # # Iterate over each direct child div element
713
- # for div in direct_child_div_elements:
714
- # # Find the <a> tag within the div
715
- # a_tag = div.find("a")
716
- # if a_tag:
717
- # # Extract the text from the <a> tag and append it to the output list
718
- # text_number.append(a_tag.text.strip())
719
-
720
- # # Print the output list
721
- # # print(text_number)
722
- # # print(len(text_number))
723
-
724
- # ### link to each Text in each chapter
725
- # text_page = text_number[0]
726
-
727
- # # Split the text by spaces
728
- # words = text_page.split()
729
-
730
- # # Find the text representing the number
731
- # text_number = words[1].strip(":") # Assuming the number text is the second word
732
-
733
- # # print(f"chapter_number - {chapter_number} : text_number - {text_number}")
734
- # texturl = baseurl + chapter_number + "/" + text_number
735
-
736
- # # print(texturl)
737
-
738
- # main(url=texturl)
739
-
740
- # st.title("Display HTML File in Streamlit")
741
-
742
- # # Path to the HTML file
743
- # html_file_path = "../Transformers/Bg. 1.1.html"
744
-
745
- # try:
746
- # # Read the HTML file
747
- # with open(html_file_path, "r", encoding="utf-8") as file:
748
- # html_content = file.read()
749
-
750
- # # Display the HTML content using the 'st.components.v1.html' component
751
- # expanded = st.checkbox("Expand HTML page")
752
- # if expanded:
753
- # st.components.v1.html(html_content, height=1600, width=680)
754
- # else:
755
- # st.components.v1.html(html_content, height=600)
756
- # except FileNotFoundError:
757
- # st.error(f"HTML file '{html_file_path}' not found!")
758
-
759
- # import streamlit as st
760
-
761
-
762
- # st.title("Streamlit Tabs Example")
763
-
764
- # # Add tabs to the sidebar
765
- # tabs = st.sidebar.radio("Navigation", ["Home", "About", "Settings"])
766
-
767
- # # Display different content based on the selected tab
768
- # if tabs == "Home":
769
- # st.header("Home Page")
770
- # st.write("Welcome to the Home page!")
771
-
772
- # elif tabs == "About":
773
- # st.header("About Page")
774
- # st.write("This is the About page.")
775
-
776
- # elif tabs == "Settings":
777
- # st.header("Settings Page")
778
- # st.write("Here you can configure your settings.")
779
-
780
- # st.title("Netflix-like Grid Display")
781
-
782
-
783
-
784
- # import streamlit as st
785
- # import os
786
- # import streamlit.components.v1 as components
787
-
788
- # # Define movie data
789
- # movies = [
790
- # {"title": "Movie 1", "poster_path": "../Transformers/Bg. 1.1.html"},
791
- # {"title": "Movie 2", "poster_url": "https://via.placeholder.com/150"},
792
- # {"title": "Movie 3", "poster_url": "https://via.placeholder.com/150"},
793
- # {"title": "Movie 4", "poster_url": "https://via.placeholder.com/150"},
794
- # {"title": "Movie 5", "poster_url": "https://via.placeholder.com/150"},
795
- # {"title": "Movie 6", "poster_url": "https://via.placeholder.com/150"},
796
- # {"title": "Movie 7", "poster_url": "https://via.placeholder.com/150"},
797
- # {"title": "Movie 8", "poster_url": "https://via.placeholder.com/150"},
798
- # ]
799
-
800
- # Display movies in a grid
801
- # num_columns = 4
802
- # col_count = 0
803
- # cols = st.columns(num_columns)
804
- # expanded = st.checkbox("Expand HTML page")
805
- # for movie in movies:
806
- # with cols[col_count % num_columns]:
807
- # st.markdown(f"<h2>{movie['title']}</h2>", unsafe_allow_html=True)
808
- # st.write(f"Placeholder for {movie['title']}")
809
- # if 'poster_path' in movie:
810
- # # Convert local file path to URL
811
- # poster_url = f"file://{os.path.abspath(movie['poster_path'])}"
812
- # print(poster_url)
813
- # # Display the HTML page using IFrame
814
- # if expanded:
815
- # components.iframe(poster_url, width=800, height=600)
816
- # else:
817
- # components.iframe(poster_url,width=200)
818
- # else:
819
- # # Display placeholder image
820
- # st.image(movie["poster_url"], width=200)
821
- # col_count += 1
822
-
823
-
 
1
+ import pymupdf # PyMuPDF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import tempfile
3
  import nltk
 
 
4
  from nltk.corpus import stopwords
5
  from collections import Counter
6
  from streamlit_image_zoom import image_zoom
7
  from PIL import Image
8
+ import streamlit as st
9
 
10
+ nltk.download('stopwords')
 
 
 
 
 
11
 
12
+ def highlight_pdf(file_path, text_to_highlight, page_numbers):
13
  # Open the original PDF
14
  doc = pymupdf.open(file_path)
 
15
  pages_to_display = [doc.load_page(page_number - 1) for page_number in page_numbers]
16
 
17
  # Tokenize the text into words
18
  words = text_to_highlight.split()
19
 
 
 
20
  # Remove stopwords
21
  stop_words = set(stopwords.words("english"))
22
  words = [word for word in words if word.lower() not in stop_words]
 
 
 
 
 
23
 
24
+ # Highlight the specified words on the canvas
 
 
 
25
  for page in pages_to_display:
 
 
26
  for word in words:
27
+ highlight_rects = page.search_for(word, quads=True)
28
+ for rect in highlight_rects:
29
+ page.add_highlight_annot(rect)
30
+
31
+ # Create a new document with only the specified pages
 
 
 
 
32
  new_doc = pymupdf.open()
33
+ new_page_numbers = []
34
+
35
  for page in pages_to_display:
36
  new_doc.insert_pdf(doc, from_page=page.number, to_page=page.number)
37
+ new_page_numbers.append(new_doc.page_count) # Keep track of new page numbers
38
 
39
+ # Save the modified PDF to a temporary file
 
40
  with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_file:
41
  temp_pdf_path = temp_file.name
42
+ new_doc.save(temp_file.name)
 
 
43
 
44
  new_doc.save("example_highlighted.pdf")
45
 
46
+ return temp_pdf_path, new_page_numbers
 
 
47
 
48
  def pdf_to_images(pdf_path, page_numbers):
49
  doc = pymupdf.open(pdf_path)
50
  images = []
51
+ for page_number in page_numbers:
52
  page = doc.load_page(page_number - 1)
53
  pix = page.get_pixmap()
54
  img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
55
  images.append(img)
56
  return images
57
 
 
58
  def display_highlighted_pdf():
59
  pdf_path = "Bhagavad-Gita-As-It-Is.pdf"
60
+ sources = [7, 8, 18, 20, 40, 66]
61
+ response_text = ("I offer my respectful obeisances unto the lotus feet of my spiritual master "
62
+ "and unto the feet of all Vaishnavas. I offer my respectful")
63
+
64
+ highlighted_pdf_path, new_page_numbers = highlight_pdf(file_path=pdf_path, text_to_highlight=response_text, page_numbers=sources)
65
+
66
+ images = pdf_to_images(highlighted_pdf_path, new_page_numbers)
67
+
68
+ # Calculate the number of rows and columns based on the number of pages
69
+ num_pages = len(new_page_numbers)
70
+ num_cols = 2 # Number of columns
71
+ num_rows = (num_pages + num_cols - 1) // num_cols # Number of rows
72
+
73
+ # Display images in a grid layout with spacing
74
+ for row in range(num_rows):
75
+ cols = st.columns(num_cols)
76
+ for col in range(num_cols):
77
+ idx = row * num_cols + col
78
+ if idx < num_pages:
79
+ img = images[idx]
80
+ if isinstance(img, Image.Image):
81
+ with cols[col]:
82
+ st.image(img, use_column_width=True)
83
+ st.write("") # Add spacing
84
+ else:
85
+ st.error("The provided image is not a valid Pillow Image object.")
86
+
87
+ display_highlighted_pdf()