Liam Dyer commited on
Commit
d6c1ef6
1 Parent(s): 1fc553f

add filenames because of a gradio client bug

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -86,7 +86,7 @@ def convert_pandoc(input_file, filename) -> str:
86
 
87
 
88
  @spaces.GPU
89
- def convert(input_file) -> str:
90
  plain_text_filetypes = [
91
  ".txt",
92
  ".csv",
@@ -99,14 +99,14 @@ def convert(input_file) -> str:
99
  ".jsonc",
100
  ]
101
  # Already a plain text file that wouldn't benefit from pandoc so return the content
102
- if any(input_file.endswith(ft) for ft in plain_text_filetypes):
103
  with open(input_file, "r") as f:
104
  return f.read()
105
 
106
- if input_file.endswith(".pdf"):
107
  return convert_pdf(input_file)
108
 
109
- return convert_pandoc(input_file, input_file)
110
 
111
 
112
  def chunk_to_length(text, max_length=512):
@@ -119,11 +119,14 @@ def chunk_to_length(text, max_length=512):
119
 
120
 
121
  @spaces.GPU
122
- def predict(queries, documents, max_characters) -> list[list[str]]:
123
  queries = queries.split("\n")
 
124
 
125
- # Conver the documents to text
126
- converted_docs = [convert(doc) for doc in documents]
 
 
127
 
128
  # Return if the total length is less than the max characters
129
  total_doc_lengths = sum([len(doc) for doc in converted_docs])
@@ -193,6 +196,7 @@ gr.Interface(
193
  inputs=[
194
  gr.Textbox(label="Queries separated by newline"),
195
  gr.File(label="Upload File", file_count="multiple"),
 
196
  gr.Number(label="Max output characters", value=16384),
197
  ],
198
  outputs=[gr.JSON(label="Embedded documents")],
 
86
 
87
 
88
  @spaces.GPU
89
+ def convert(input_file, filename) -> str:
90
  plain_text_filetypes = [
91
  ".txt",
92
  ".csv",
 
99
  ".jsonc",
100
  ]
101
  # Already a plain text file that wouldn't benefit from pandoc so return the content
102
+ if any(filename.endswith(ft) for ft in plain_text_filetypes):
103
  with open(input_file, "r") as f:
104
  return f.read()
105
 
106
+ if filename.endswith(".pdf"):
107
  return convert_pdf(input_file)
108
 
109
+ return convert_pandoc(input_file, filename)
110
 
111
 
112
  def chunk_to_length(text, max_length=512):
 
119
 
120
 
121
  @spaces.GPU
122
+ def predict(queries, documents, document_filenames, max_characters) -> list[list[str]]:
123
  queries = queries.split("\n")
124
+ document_filenames = document_filenames.split("\n")
125
 
126
+ # Convert the documents to text
127
+ converted_docs = [
128
+ convert(doc, filename) for doc, filename in zip(documents, document_filenames)
129
+ ]
130
 
131
  # Return if the total length is less than the max characters
132
  total_doc_lengths = sum([len(doc) for doc in converted_docs])
 
196
  inputs=[
197
  gr.Textbox(label="Queries separated by newline"),
198
  gr.File(label="Upload File", file_count="multiple"),
199
+ gr.Textbox(label="Filenames separated by newline"),
200
  gr.Number(label="Max output characters", value=16384),
201
  ],
202
  outputs=[gr.JSON(label="Embedded documents")],