Tonic commited on
Commit
a5b165e
Β·
1 Parent(s): eea2d66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -19
app.py CHANGED
@@ -27,29 +27,36 @@ from configs import DEFAULT_TERMINATE_MESSAGE, Q1, Q2, Q3, TIMEOUT, TITLE
27
  from custom_widgets import RowAgentWidget
28
  from panel.chat import ChatInterface
29
  from panel.widgets import Button, CodeEditor, PasswordInput, Switch, TextInput
 
 
 
 
 
30
 
31
  def process_file_with_unstructured(file_path):
32
- # Set up the LocalRunner
33
- runner = LocalRunner(
34
- processor_config=ProcessorConfig(
35
- verbose=True,
36
- output_dir="local-ingest-output",
37
- num_processes=2,
38
- ),
39
- read_config=ReadConfig(),
40
- partition_config=PartitionConfig(
41
- partition_by_api=True,
42
- api_key=os.getenv("UNSTRUCTURED_API_KEY"),
43
- ),
44
- )
45
- runner.run(input_path=file_path, recursive=True)
46
- elements = partition(filename=file_path, content_type="application/pdf")
47
- loader = UnstructuredFileLoader(file_path)
48
- docs = loader.load()
49
- raw_text = docs[0].page_content
50
 
51
- return raw_text
 
52
 
 
53
 
54
  pn.extension("codeeditor")
55
  template = pn.template.BootstrapTemplate(title=TITLE)
 
27
  from custom_widgets import RowAgentWidget
28
  from panel.chat import ChatInterface
29
  from panel.widgets import Button, CodeEditor, PasswordInput, Switch, TextInput
30
+ import os
31
+ from langchain.document_loaders import TextLoader, PythonLoader, UnstructuredFileLoader
32
+ from unstructured.ingest.runner import LocalRunner
33
+ from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig, PartitionConfig
34
+ from unstructured.partition.auto import partition
35
 
36
  def process_file_with_unstructured(file_path):
37
+ # Determine the file extension
38
+ _, file_extension = os.path.splitext(file_path)
39
+ file_extension = file_extension.lower()
40
+
41
+ # Initialize the document list
42
+ docs = []
43
+
44
+ # Choose the appropriate loader based on file extension
45
+ if file_extension in ['.txt', '.md', '.html', '.rst']:
46
+ loader = TextLoader(file_path)
47
+ docs = loader.load()
48
+ elif file_extension == '.py':
49
+ loader = PythonLoader(file_path)
50
+ docs = loader.load()
51
+ else:
52
+ # Default to UnstructuredFileLoader for other file types
53
+ loader = UnstructuredFileLoader(file_path)
54
+ docs = loader.load()
55
 
56
+ # Process the loaded documents
57
+ raw_text = "\n".join(doc.text for doc in docs)
58
 
59
+ return raw_text
60
 
61
  pn.extension("codeeditor")
62
  template = pn.template.BootstrapTemplate(title=TITLE)