elia-waefler commited on
Commit
64722c6
2 Parent(s): 95db8de bbe64b5

Merge remote-tracking branch 'origin/main'

Browse files
.github/workflows/check.yml DELETED
@@ -1,16 +0,0 @@
1
- name: Check file size
2
- on:
3
- pull_request:
4
- branches: [main]
5
-
6
- # to run this workflow manually from the Actions tab
7
- workflow_dispatch:
8
-
9
- jobs:
10
- sync-to-hub:
11
- runs-on: ubuntu-latest
12
- steps:
13
- - name: Check large files
14
- uses: ActionsDesk/lfs-warning@v2.0
15
- with:
16
- filesizelimit: 10485760 # this is 10MB
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/hugging_face.yml CHANGED
@@ -13,7 +13,10 @@ jobs:
13
  with:
14
  fetch-depth: 0
15
  lfs: true
 
 
16
  - name: Push to hub
17
  env:
18
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
19
  run: git push https://AIhackathons:$HF_TOKEN@huggingface.co/spaces/AIhackathons/docverifyrag main
 
 
13
  with:
14
  fetch-depth: 0
15
  lfs: true
16
+ - name: Navigate to frontend directory
17
+ run: cd ./frontend
18
  - name: Push to hub
19
  env:
20
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
21
  run: git push https://AIhackathons:$HF_TOKEN@huggingface.co/spaces/AIhackathons/docverifyrag main
22
+
Dockerfile CHANGED
@@ -25,7 +25,7 @@ COPY backend .
25
 
26
  # Install backend dependencies
27
  COPY backend/requirements.txt .
28
- RUN pip install --no-cache-dir -r requirements.txt
29
 
30
  # Stage 3: Serve frontend and backend using nginx and gunicorn
31
  FROM nginx:latest AS production
 
25
 
26
  # Install backend dependencies
27
  COPY backend/requirements.txt .
28
+ RUN pip install --no-cache-dir -r requirements.txt --vvv
29
 
30
  # Stage 3: Serve frontend and backend using nginx and gunicorn
31
  FROM nginx:latest AS production
README.md CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  <!-- PROJECT TITLE -->
2
  <h1 align="center">DocVerifyRAG: Document Verification and Anomaly Detection</h1>
3
  <div id="header" align="center">
@@ -108,8 +120,6 @@ To deploy DocVerifyRAG using Docker, follow these steps:
108
  ### Usage
109
 
110
  Access the web interface and follow the prompts to upload documents, classify them, and verify metadata. The AI-powered anomaly detection system will automatically flag any discrepancies or errors in the document metadata, providing accurate and reliable document management solutions for hospitals.
111
-
112
-
113
  ## Authors
114
 
115
  | Name | Link |
@@ -119,8 +129,7 @@ Access the web interface and follow the prompts to upload documents, classify th
119
  | Carlos Salgado | [GitHub](https://github.com/salgadev) |
120
  | Abdul Qadeer | [GitHub](https://github.com/AbdulQadeer-55) |
121
 
 
122
  ## License
123
 
124
  [![GitLicense](https://img.shields.io/badge/License-MIT-lime.svg)](https://github.com/eliawaefler/DocVerifyRAG/blob/main/LICENSE)
125
- ____
126
-
 
1
+
2
+ ---
3
+ title: DocVerifyRAG
4
+ emoji: 🐠
5
+ colorFrom: pink
6
+ colorTo: green
7
+ sdk: streamlit
8
+ sdk_version: 1.27.0
9
+ app_file: app.py
10
+ pinned: false
11
+ ---
12
+
13
  <!-- PROJECT TITLE -->
14
  <h1 align="center">DocVerifyRAG: Document Verification and Anomaly Detection</h1>
15
  <div id="header" align="center">
 
120
  ### Usage
121
 
122
  Access the web interface and follow the prompts to upload documents, classify them, and verify metadata. The AI-powered anomaly detection system will automatically flag any discrepancies or errors in the document metadata, providing accurate and reliable document management solutions for hospitals.
 
 
123
  ## Authors
124
 
125
  | Name | Link |
 
129
  | Carlos Salgado | [GitHub](https://github.com/salgadev) |
130
  | Abdul Qadeer | [GitHub](https://github.com/AbdulQadeer-55) |
131
 
132
+
133
  ## License
134
 
135
  [![GitLicense](https://img.shields.io/badge/License-MIT-lime.svg)](https://github.com/eliawaefler/DocVerifyRAG/blob/main/LICENSE)
 
 
backend/generate_metadata.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import argparse
3
  import json
4
  import openai
@@ -12,13 +13,13 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
12
  load_dotenv()
13
 
14
 
15
- def ingest(file_path):
16
- extension = file_path.split('.')[-1]
17
- ext = extension.lower()
18
- if ext == 'pdf':
19
- loader = UnstructuredPDFLoader(file_path)
20
- elif ext == 'txt':
21
- loader = TextLoader(file_path)
22
  else:
23
  raise NotImplementedError('Only .txt or .pdf files are supported')
24
 
@@ -29,7 +30,7 @@ def ingest(file_path):
29
  "\n\n",
30
  "\n",
31
  " ",
32
- ",",
33
  "\uff0c", # Fullwidth comma
34
  "\u3001", # Ideographic comma
35
  "\uff0e", # Fullwidth full stop
 
1
  import os
2
+ import io
3
  import argparse
4
  import json
5
  import openai
 
13
  load_dotenv()
14
 
15
 
16
+ import io
17
+
18
+ def ingest(file_obj, file_ext='pdf'):
19
+ if file_ext == 'pdf':
20
+ loader = UnstructuredPDFLoader(file_obj)
21
+ elif file_ext == 'txt':
22
+ loader = TextLoader(file_obj)
23
  else:
24
  raise NotImplementedError('Only .txt or .pdf files are supported')
25
 
 
30
  "\n\n",
31
  "\n",
32
  " ",
33
+ ",",
34
  "\uff0c", # Fullwidth comma
35
  "\u3001", # Ideographic comma
36
  "\uff0e", # Fullwidth full stop
flake.nix CHANGED
@@ -14,6 +14,9 @@
14
  devShells.${system}.default = pkgs.mkShell {
15
  packages = [
16
  (pkgs.python311.withPackages (python-pkgs: [
 
 
 
17
  python-pkgs.numpy
18
  python-pkgs.pandas
19
  python-pkgs.scipy
@@ -23,15 +26,20 @@
23
  python-pkgs.langchain
24
  python-pkgs.langchain-text-splitters
25
  python-pkgs.unstructured
 
 
 
 
 
 
 
 
26
  python-pkgs.openai
27
  python-pkgs.pydantic
28
  python-pkgs.python-dotenv
29
  python-pkgs.configargparse
30
  python-pkgs.streamlit
31
- python-pkgs.pip
32
  python-pkgs.lark
33
- python-pkgs.jupyter
34
- python-pkgs.notebook
35
  python-pkgs.sentence-transformers
36
  pkgs.unstructured-api
37
  ]))
 
14
  devShells.${system}.default = pkgs.mkShell {
15
  packages = [
16
  (pkgs.python311.withPackages (python-pkgs: [
17
+ python-pkgs.pip # VsCode starts
18
+ python-pkgs.jupyter
19
+ python-pkgs.notebook # VsCode ends
20
  python-pkgs.numpy
21
  python-pkgs.pandas
22
  python-pkgs.scipy
 
26
  python-pkgs.langchain
27
  python-pkgs.langchain-text-splitters
28
  python-pkgs.unstructured
29
+ python-pkgs.wrapt # unstructured[local-inference] starts
30
+ python-pkgs.iso-639
31
+ python-pkgs.emoji
32
+ python-pkgs.pillow-heif
33
+ python-pkgs.magic
34
+ python-pkgs.poppler-qt5
35
+ python-pkgs.pytesseract
36
+ python-pkgs.langdetect # unstructured[local-inference] ends
37
  python-pkgs.openai
38
  python-pkgs.pydantic
39
  python-pkgs.python-dotenv
40
  python-pkgs.configargparse
41
  python-pkgs.streamlit
 
42
  python-pkgs.lark
 
 
43
  python-pkgs.sentence-transformers
44
  pkgs.unstructured-api
45
  ]))
frontend/.vite/deps_temp_eb58ea19/package.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "type": "module"
3
+ }
frontend/src/components/Features.tsx CHANGED
@@ -58,7 +58,7 @@ export const Features = () => {
58
  </CardHeader>
59
  <CardFooter className="flex flex-wrap md:justify-center gap-4">
60
  <iframe
61
- src="https://sandramsc-docverifyrag.hf.space"
62
  width="850"
63
  style={{ border: 'none' }}
64
  height="750"
 
58
  </CardHeader>
59
  <CardFooter className="flex flex-wrap md:justify-center gap-4">
60
  <iframe
61
+ src="https://aihackathons-docverifyrag.hf.space"
62
  width="850"
63
  style={{ border: 'none' }}
64
  height="750"