Spaces:
Sleeping
Sleeping
Carlos Salgado
commited on
Commit
•
bbe64b5
1
Parent(s):
b10d0e6
update flake, fix ingest steamlit compatibility bug
Browse files- backend/generate_metadata.py +9 -8
- flake.nix +11 -3
backend/generate_metadata.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import argparse
|
3 |
import json
|
4 |
import openai
|
@@ -12,13 +13,13 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
12 |
load_dotenv()
|
13 |
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
if
|
19 |
-
loader = UnstructuredPDFLoader(
|
20 |
-
elif
|
21 |
-
loader = TextLoader(
|
22 |
else:
|
23 |
raise NotImplementedError('Only .txt or .pdf files are supported')
|
24 |
|
@@ -29,7 +30,7 @@ def ingest(file_path):
|
|
29 |
"\n\n",
|
30 |
"\n",
|
31 |
" ",
|
32 |
-
",",
|
33 |
"\uff0c", # Fullwidth comma
|
34 |
"\u3001", # Ideographic comma
|
35 |
"\uff0e", # Fullwidth full stop
|
|
|
1 |
import os
|
2 |
+
import io
|
3 |
import argparse
|
4 |
import json
|
5 |
import openai
|
|
|
13 |
load_dotenv()
|
14 |
|
15 |
|
16 |
+
import io
|
17 |
+
|
18 |
+
def ingest(file_obj, file_ext='pdf'):
|
19 |
+
if file_ext == 'pdf':
|
20 |
+
loader = UnstructuredPDFLoader(file_obj)
|
21 |
+
elif file_ext == 'txt':
|
22 |
+
loader = TextLoader(file_obj)
|
23 |
else:
|
24 |
raise NotImplementedError('Only .txt or .pdf files are supported')
|
25 |
|
|
|
30 |
"\n\n",
|
31 |
"\n",
|
32 |
" ",
|
33 |
+
",",
|
34 |
"\uff0c", # Fullwidth comma
|
35 |
"\u3001", # Ideographic comma
|
36 |
"\uff0e", # Fullwidth full stop
|
flake.nix
CHANGED
@@ -14,6 +14,9 @@
|
|
14 |
devShells.${system}.default = pkgs.mkShell {
|
15 |
packages = [
|
16 |
(pkgs.python311.withPackages (python-pkgs: [
|
|
|
|
|
|
|
17 |
python-pkgs.numpy
|
18 |
python-pkgs.pandas
|
19 |
python-pkgs.scipy
|
@@ -23,15 +26,20 @@
|
|
23 |
python-pkgs.langchain
|
24 |
python-pkgs.langchain-text-splitters
|
25 |
python-pkgs.unstructured
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
python-pkgs.openai
|
27 |
python-pkgs.pydantic
|
28 |
python-pkgs.python-dotenv
|
29 |
python-pkgs.configargparse
|
30 |
python-pkgs.streamlit
|
31 |
-
python-pkgs.pip
|
32 |
python-pkgs.lark
|
33 |
-
python-pkgs.jupyter
|
34 |
-
python-pkgs.notebook
|
35 |
python-pkgs.sentence-transformers
|
36 |
pkgs.unstructured-api
|
37 |
]))
|
|
|
14 |
devShells.${system}.default = pkgs.mkShell {
|
15 |
packages = [
|
16 |
(pkgs.python311.withPackages (python-pkgs: [
|
17 |
+
python-pkgs.pip # VsCode starts
|
18 |
+
python-pkgs.jupyter
|
19 |
+
python-pkgs.notebook # VsCode ends
|
20 |
python-pkgs.numpy
|
21 |
python-pkgs.pandas
|
22 |
python-pkgs.scipy
|
|
|
26 |
python-pkgs.langchain
|
27 |
python-pkgs.langchain-text-splitters
|
28 |
python-pkgs.unstructured
|
29 |
+
python-pkgs.wrapt # unstructured[local-inference] starts
|
30 |
+
python-pkgs.iso-639
|
31 |
+
python-pkgs.emoji
|
32 |
+
python-pkgs.pillow-heif
|
33 |
+
python-pkgs.magic
|
34 |
+
python-pkgs.poppler-qt5
|
35 |
+
python-pkgs.pytesseract
|
36 |
+
python-pkgs.langdetect # unstructured[local-inference] ends
|
37 |
python-pkgs.openai
|
38 |
python-pkgs.pydantic
|
39 |
python-pkgs.python-dotenv
|
40 |
python-pkgs.configargparse
|
41 |
python-pkgs.streamlit
|
|
|
42 |
python-pkgs.lark
|
|
|
|
|
43 |
python-pkgs.sentence-transformers
|
44 |
pkgs.unstructured-api
|
45 |
]))
|