Rahul8827 commited on
Commit
72e4a80
1 Parent(s): bf15298

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +21 -0
  2. requirements.txt +67 -0
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import pytesseract
3
+ import gradio as gr
4
+ from PIL import Image
5
+
6
+ def pdf_to_text(pdf_file):
7
+ doc = fitz.open(pdf_file)
8
+ text = ""
9
+ for page in doc:
10
+ pix = page.get_pixmap()
11
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
12
+ text += pytesseract.image_to_string(img)
13
+ doc.close()
14
+ return text
15
+
16
+ def pdf_to_text_interface(pdf_file):
17
+ text = pdf_to_text(pdf_file)
18
+ return text
19
+
20
+ iface = gr.Interface(fn=pdf_to_text_interface, inputs="file", outputs="text", title="PDF to Text Converter")
21
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==5.0.1
5
+ anyio==3.7.1
6
+ async-timeout==4.0.2
7
+ attrs==23.1.0
8
+ certifi==2023.5.7
9
+ charset-normalizer==3.1.0
10
+ click==8.1.3
11
+ colorama==0.4.6
12
+ contourpy==1.1.0
13
+ cycler==0.11.0
14
+ Cython==0.29.36
15
+ fastapi==0.99.1
16
+ ffmpy==0.3.0
17
+ filelock==3.12.2
18
+ fonttools==4.40.0
19
+ frozenlist==1.3.3
20
+ fsspec==2023.6.0
21
+ gradio==3.35.2
22
+ gradio_client==0.2.7
23
+ h11==0.14.0
24
+ httpcore==0.17.3
25
+ httpx==0.24.1
26
+ huggingface-hub==0.16.2
27
+ idna==3.4
28
+ Jinja2==3.1.2
29
+ jsonschema==4.17.3
30
+ kiwisolver==1.4.4
31
+ linkify-it-py==2.0.2
32
+ markdown-it-py==2.2.0
33
+ MarkupSafe==2.1.3
34
+ matplotlib==3.7.2
35
+ mdit-py-plugins==0.3.3
36
+ mdurl==0.1.2
37
+ multidict==6.0.4
38
+ numpy==1.25.0
39
+ orjson==3.9.1
40
+ packaging==23.1
41
+ pandas==2.0.3
42
+ Pillow==10.0.0
43
+ pydantic==1.10.11
44
+ pydub==0.25.1
45
+ Pygments==2.15.1
46
+ PyMuPDF==1.22.5
47
+ pyparsing==3.0.9
48
+ pyrsistent==0.19.3
49
+ pytesseract==0.3.10
50
+ python-dateutil==2.8.2
51
+ python-multipart==0.0.6
52
+ pytz==2023.3
53
+ PyYAML==6.0
54
+ requests==2.31.0
55
+ semantic-version==2.10.0
56
+ six==1.16.0
57
+ sniffio==1.3.0
58
+ starlette==0.27.0
59
+ toolz==0.12.0
60
+ tqdm==4.65.0
61
+ typing_extensions==4.7.1
62
+ tzdata==2023.3
63
+ uc-micro-py==1.0.2
64
+ urllib3==2.0.3
65
+ uvicorn==0.22.0
66
+ websockets==11.0.3
67
+ yarl==1.9.2