KingNish commited on
Commit
558887c
·
verified ·
1 Parent(s): cc4a32a

Correct reading of length

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -94,9 +94,7 @@ def extract_text_from_pptx(pptx_data, clean=True):
94
  return text, len(text)
95
 
96
  def read_document(file, clean=True):
97
- """Reads content from various document formats."""
98
  file_path = file.name
99
- # No file extension used
100
 
101
  with open(file_path, "rb") as f:
102
  file_content = f.read()
@@ -109,7 +107,6 @@ def read_document(file, clean=True):
109
  mime = kind.mime
110
 
111
  if mime == "application/pdf":
112
- # PDF Handling (unchanged)
113
  try:
114
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content))
115
  content = ''
@@ -117,11 +114,10 @@ def read_document(file, clean=True):
117
  content += pdf_reader.pages[page].extract_text()
118
  if clean:
119
  content = clean_text(content)
120
- return content, len(content)
121
  except Exception as e:
122
  return f"Error reading PDF: {e}", 0
123
  elif mime == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
124
- # XLSX Handling (unchanged)
125
  try:
126
  wb = load_workbook(io.BytesIO(file_content))
127
  content = ''
@@ -132,7 +128,7 @@ def read_document(file, clean=True):
132
  content += str(cell.value) + ' '
133
  if clean:
134
  content = clean_text(content)
135
- return content, len(content)
136
  except Exception as e:
137
  return f"Error reading XLSX: {e}", 0
138
  elif mime == "text/plain":
@@ -140,7 +136,7 @@ def read_document(file, clean=True):
140
  content = file_content.decode('utf-8')
141
  if clean:
142
  content = clean_text(content)
143
- return content, len(content)
144
  except Exception as e:
145
  return f"Error reading TXT file: {e}", 0
146
  elif mime == "text/csv":
@@ -148,7 +144,7 @@ def read_document(file, clean=True):
148
  content = file_content.decode('utf-8')
149
  if clean:
150
  content = clean_text(content)
151
- return content, len(content)
152
  except Exception as e:
153
  return f"Error reading CSV file: {e}", 0
154
  elif mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
@@ -167,11 +163,12 @@ def read_document(file, clean=True):
167
  content = file_content.decode('utf-8')
168
  if clean:
169
  content = clean_text(content)
170
- return content, len(content)
171
  except Exception as e:
172
  return f"Error reading file: {e}", 0
173
 
174
 
 
175
  # --- Chat Functions ---
176
 
177
  def generate_mistral_response(message):
 
94
  return text, len(text)
95
 
96
  def read_document(file, clean=True):
 
97
  file_path = file.name
 
98
 
99
  with open(file_path, "rb") as f:
100
  file_content = f.read()
 
107
  mime = kind.mime
108
 
109
  if mime == "application/pdf":
 
110
  try:
111
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content))
112
  content = ''
 
114
  content += pdf_reader.pages[page].extract_text()
115
  if clean:
116
  content = clean_text(content)
117
+ return content, len(repr(content))
118
  except Exception as e:
119
  return f"Error reading PDF: {e}", 0
120
  elif mime == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
 
121
  try:
122
  wb = load_workbook(io.BytesIO(file_content))
123
  content = ''
 
128
  content += str(cell.value) + ' '
129
  if clean:
130
  content = clean_text(content)
131
+ return content, len(repr(content))
132
  except Exception as e:
133
  return f"Error reading XLSX: {e}", 0
134
  elif mime == "text/plain":
 
136
  content = file_content.decode('utf-8')
137
  if clean:
138
  content = clean_text(content)
139
+ return content, len(repr(content))
140
  except Exception as e:
141
  return f"Error reading TXT file: {e}", 0
142
  elif mime == "text/csv":
 
144
  content = file_content.decode('utf-8')
145
  if clean:
146
  content = clean_text(content)
147
+ return content, len(repr(content))
148
  except Exception as e:
149
  return f"Error reading CSV file: {e}", 0
150
  elif mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
 
163
  content = file_content.decode('utf-8')
164
  if clean:
165
  content = clean_text(content)
166
+ return content, len(repr(content))
167
  except Exception as e:
168
  return f"Error reading file: {e}", 0
169
 
170
 
171
+
172
  # --- Chat Functions ---
173
 
174
  def generate_mistral_response(message):