Spaces:
Sleeping
Sleeping
elia-waefler
commited on
Merge remote-tracking branch 'origin/main'
Browse files- .github/workflows/check.yml +0 -16
- .github/workflows/hugging_face.yml +3 -0
- Dockerfile +1 -1
- README.md +13 -4
- backend/generate_metadata.py +9 -8
- flake.nix +11 -3
- frontend/.vite/deps_temp_eb58ea19/package.json +3 -0
- frontend/src/components/Features.tsx +1 -1
.github/workflows/check.yml
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
name: Check file size
|
2 |
-
on:
|
3 |
-
pull_request:
|
4 |
-
branches: [main]
|
5 |
-
|
6 |
-
# to run this workflow manually from the Actions tab
|
7 |
-
workflow_dispatch:
|
8 |
-
|
9 |
-
jobs:
|
10 |
-
sync-to-hub:
|
11 |
-
runs-on: ubuntu-latest
|
12 |
-
steps:
|
13 |
-
- name: Check large files
|
14 |
-
uses: ActionsDesk/lfs-warning@v2.0
|
15 |
-
with:
|
16 |
-
filesizelimit: 10485760 # this is 10MB
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/hugging_face.yml
CHANGED
@@ -13,7 +13,10 @@ jobs:
|
|
13 |
with:
|
14 |
fetch-depth: 0
|
15 |
lfs: true
|
|
|
|
|
16 |
- name: Push to hub
|
17 |
env:
|
18 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
19 |
run: git push https://AIhackathons:$HF_TOKEN@huggingface.co/spaces/AIhackathons/docverifyrag main
|
|
|
|
13 |
with:
|
14 |
fetch-depth: 0
|
15 |
lfs: true
|
16 |
+
- name: Navigate to frontend directory
|
17 |
+
run: cd ./frontend
|
18 |
- name: Push to hub
|
19 |
env:
|
20 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
21 |
run: git push https://AIhackathons:$HF_TOKEN@huggingface.co/spaces/AIhackathons/docverifyrag main
|
22 |
+
|
Dockerfile
CHANGED
@@ -25,7 +25,7 @@ COPY backend .
|
|
25 |
|
26 |
# Install backend dependencies
|
27 |
COPY backend/requirements.txt .
|
28 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
29 |
|
30 |
# Stage 3: Serve frontend and backend using nginx and gunicorn
|
31 |
FROM nginx:latest AS production
|
|
|
25 |
|
26 |
# Install backend dependencies
|
27 |
COPY backend/requirements.txt .
|
28 |
+
RUN pip install --no-cache-dir -r requirements.txt --vvv
|
29 |
|
30 |
# Stage 3: Serve frontend and backend using nginx and gunicorn
|
31 |
FROM nginx:latest AS production
|
README.md
CHANGED
@@ -1,3 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
<!-- PROJECT TITLE -->
|
2 |
<h1 align="center">DocVerifyRAG: Document Verification and Anomaly Detection</h1>
|
3 |
<div id="header" align="center">
|
@@ -108,8 +120,6 @@ To deploy DocVerifyRAG using Docker, follow these steps:
|
|
108 |
### Usage
|
109 |
|
110 |
Access the web interface and follow the prompts to upload documents, classify them, and verify metadata. The AI-powered anomaly detection system will automatically flag any discrepancies or errors in the document metadata, providing accurate and reliable document management solutions for hospitals.
|
111 |
-
|
112 |
-
|
113 |
## Authors
|
114 |
|
115 |
| Name | Link |
|
@@ -119,8 +129,7 @@ Access the web interface and follow the prompts to upload documents, classify th
|
|
119 |
| Carlos Salgado | [GitHub](https://github.com/salgadev) |
|
120 |
| Abdul Qadeer | [GitHub](https://github.com/AbdulQadeer-55) |
|
121 |
|
|
|
122 |
## License
|
123 |
|
124 |
[![GitLicense](https://img.shields.io/badge/License-MIT-lime.svg)](https://github.com/eliawaefler/DocVerifyRAG/blob/main/LICENSE)
|
125 |
-
____
|
126 |
-
|
|
|
1 |
+
|
2 |
+
---
|
3 |
+
title: DocVerifyRAG
|
4 |
+
emoji: 🐠
|
5 |
+
colorFrom: pink
|
6 |
+
colorTo: green
|
7 |
+
sdk: streamlit
|
8 |
+
sdk_version: 1.27.0
|
9 |
+
app_file: app.py
|
10 |
+
pinned: false
|
11 |
+
---
|
12 |
+
|
13 |
<!-- PROJECT TITLE -->
|
14 |
<h1 align="center">DocVerifyRAG: Document Verification and Anomaly Detection</h1>
|
15 |
<div id="header" align="center">
|
|
|
120 |
### Usage
|
121 |
|
122 |
Access the web interface and follow the prompts to upload documents, classify them, and verify metadata. The AI-powered anomaly detection system will automatically flag any discrepancies or errors in the document metadata, providing accurate and reliable document management solutions for hospitals.
|
|
|
|
|
123 |
## Authors
|
124 |
|
125 |
| Name | Link |
|
|
|
129 |
| Carlos Salgado | [GitHub](https://github.com/salgadev) |
|
130 |
| Abdul Qadeer | [GitHub](https://github.com/AbdulQadeer-55) |
|
131 |
|
132 |
+
|
133 |
## License
|
134 |
|
135 |
[![GitLicense](https://img.shields.io/badge/License-MIT-lime.svg)](https://github.com/eliawaefler/DocVerifyRAG/blob/main/LICENSE)
|
|
|
|
backend/generate_metadata.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import argparse
|
3 |
import json
|
4 |
import openai
|
@@ -12,13 +13,13 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
12 |
load_dotenv()
|
13 |
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
if
|
19 |
-
loader = UnstructuredPDFLoader(
|
20 |
-
elif
|
21 |
-
loader = TextLoader(
|
22 |
else:
|
23 |
raise NotImplementedError('Only .txt or .pdf files are supported')
|
24 |
|
@@ -29,7 +30,7 @@ def ingest(file_path):
|
|
29 |
"\n\n",
|
30 |
"\n",
|
31 |
" ",
|
32 |
-
",",
|
33 |
"\uff0c", # Fullwidth comma
|
34 |
"\u3001", # Ideographic comma
|
35 |
"\uff0e", # Fullwidth full stop
|
|
|
1 |
import os
|
2 |
+
import io
|
3 |
import argparse
|
4 |
import json
|
5 |
import openai
|
|
|
13 |
load_dotenv()
|
14 |
|
15 |
|
16 |
+
import io
|
17 |
+
|
18 |
+
def ingest(file_obj, file_ext='pdf'):
|
19 |
+
if file_ext == 'pdf':
|
20 |
+
loader = UnstructuredPDFLoader(file_obj)
|
21 |
+
elif file_ext == 'txt':
|
22 |
+
loader = TextLoader(file_obj)
|
23 |
else:
|
24 |
raise NotImplementedError('Only .txt or .pdf files are supported')
|
25 |
|
|
|
30 |
"\n\n",
|
31 |
"\n",
|
32 |
" ",
|
33 |
+
",",
|
34 |
"\uff0c", # Fullwidth comma
|
35 |
"\u3001", # Ideographic comma
|
36 |
"\uff0e", # Fullwidth full stop
|
flake.nix
CHANGED
@@ -14,6 +14,9 @@
|
|
14 |
devShells.${system}.default = pkgs.mkShell {
|
15 |
packages = [
|
16 |
(pkgs.python311.withPackages (python-pkgs: [
|
|
|
|
|
|
|
17 |
python-pkgs.numpy
|
18 |
python-pkgs.pandas
|
19 |
python-pkgs.scipy
|
@@ -23,15 +26,20 @@
|
|
23 |
python-pkgs.langchain
|
24 |
python-pkgs.langchain-text-splitters
|
25 |
python-pkgs.unstructured
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
python-pkgs.openai
|
27 |
python-pkgs.pydantic
|
28 |
python-pkgs.python-dotenv
|
29 |
python-pkgs.configargparse
|
30 |
python-pkgs.streamlit
|
31 |
-
python-pkgs.pip
|
32 |
python-pkgs.lark
|
33 |
-
python-pkgs.jupyter
|
34 |
-
python-pkgs.notebook
|
35 |
python-pkgs.sentence-transformers
|
36 |
pkgs.unstructured-api
|
37 |
]))
|
|
|
14 |
devShells.${system}.default = pkgs.mkShell {
|
15 |
packages = [
|
16 |
(pkgs.python311.withPackages (python-pkgs: [
|
17 |
+
python-pkgs.pip # VsCode starts
|
18 |
+
python-pkgs.jupyter
|
19 |
+
python-pkgs.notebook # VsCode ends
|
20 |
python-pkgs.numpy
|
21 |
python-pkgs.pandas
|
22 |
python-pkgs.scipy
|
|
|
26 |
python-pkgs.langchain
|
27 |
python-pkgs.langchain-text-splitters
|
28 |
python-pkgs.unstructured
|
29 |
+
python-pkgs.wrapt # unstructured[local-inference] starts
|
30 |
+
python-pkgs.iso-639
|
31 |
+
python-pkgs.emoji
|
32 |
+
python-pkgs.pillow-heif
|
33 |
+
python-pkgs.magic
|
34 |
+
python-pkgs.poppler-qt5
|
35 |
+
python-pkgs.pytesseract
|
36 |
+
python-pkgs.langdetect # unstructured[local-inference] ends
|
37 |
python-pkgs.openai
|
38 |
python-pkgs.pydantic
|
39 |
python-pkgs.python-dotenv
|
40 |
python-pkgs.configargparse
|
41 |
python-pkgs.streamlit
|
|
|
42 |
python-pkgs.lark
|
|
|
|
|
43 |
python-pkgs.sentence-transformers
|
44 |
pkgs.unstructured-api
|
45 |
]))
|
frontend/.vite/deps_temp_eb58ea19/package.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "module"
|
3 |
+
}
|
frontend/src/components/Features.tsx
CHANGED
@@ -58,7 +58,7 @@ export const Features = () => {
|
|
58 |
</CardHeader>
|
59 |
<CardFooter className="flex flex-wrap md:justify-center gap-4">
|
60 |
<iframe
|
61 |
-
src="https://
|
62 |
width="850"
|
63 |
style={{ border: 'none' }}
|
64 |
height="750"
|
|
|
58 |
</CardHeader>
|
59 |
<CardFooter className="flex flex-wrap md:justify-center gap-4">
|
60 |
<iframe
|
61 |
+
src="https://aihackathons-docverifyrag.hf.space"
|
62 |
width="850"
|
63 |
style={{ border: 'none' }}
|
64 |
height="750"
|