mateoluksenberg commited on
Commit
2a0024c
·
verified ·
1 Parent(s): 49ad315

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -21
app.py CHANGED
@@ -90,31 +90,80 @@ def extract_pptx(path):
90
  return text
91
 
92
 
93
- def mode_load(path):
94
- choice = ""
95
- file_type = path.split(".")[-1]
96
- print(file_type)
97
- if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
98
- if file_type.endswith("pdf"):
99
- content = extract_pdf(path)
100
- elif file_type.endswith("docx"):
101
- content = extract_docx(path)
102
- elif file_type.endswith("pptx"):
103
- content = extract_pptx(path)
104
- else:
105
- content = extract_text(path)
106
- choice = "doc"
107
- print(content[:100])
108
- return choice, content[:5000]
 
109
 
 
 
 
 
110
 
111
- elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
112
- content = Image.open(path).convert('RGB')
113
- choice = "image"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  return choice, content
115
 
116
- else:
117
- raise gr.Error("Oops, unsupported files.")
118
 
119
 
120
  @spaces.GPU()
 
90
  return text
91
 
92
 
93
+ # def mode_load(path):
94
+ # choice = ""
95
+ # file_type = path.split(".")[-1]
96
+ # print(file_type)
97
+ # if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
98
+ # if file_type.endswith("pdf"):
99
+ # content = extract_pdf(path)
100
+ # elif file_type.endswith("docx"):
101
+ # content = extract_docx(path)
102
+ # elif file_type.endswith("pptx"):
103
+ # content = extract_pptx(path)
104
+ # else:
105
+ # content = extract_text(path)
106
+ # choice = "doc"
107
+ # print(content[:100])
108
+ # return choice, content[:5000]
109
+
110
 
111
+ # elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
112
+ # content = Image.open(path).convert('RGB')
113
+ # choice = "image"
114
+ # return choice, content
115
 
116
+ # else:
117
+ # raise gr.Error("Oops, unsupported files.")
118
+
119
+ def mode_load(file_obj):
120
+ # Intenta detectar el tipo de archivo basado en su contenido
121
+ try:
122
+ file_obj.seek(0) # Asegúrate de que el puntero esté al inicio del archivo
123
+ # Verifica si es PDF
124
+ if file_obj.read(4) == b'%PDF':
125
+ file_obj.seek(0) # Vuelve al inicio del archivo para procesar
126
+ content = extract_pdf(file_obj)
127
+ choice = "doc"
128
+ elif file_obj.name.endswith(".docx"):
129
+ file_obj.seek(0)
130
+ content = extract_docx(file_obj)
131
+ choice = "doc"
132
+ elif file_obj.name.endswith(".pptx"):
133
+ file_obj.seek(0)
134
+ content = extract_pptx(file_obj)
135
+ choice = "doc"
136
+ elif file_obj.name.endswith(".txt"):
137
+ file_obj.seek(0)
138
+ content = file_obj.read().decode('utf-8', errors='ignore')
139
+ choice = "doc"
140
+ elif file_obj.name.endswith(".py"):
141
+ file_obj.seek(0)
142
+ content = file_obj.read().decode('utf-8', errors='ignore')
143
+ choice = "doc"
144
+ elif file_obj.name.endswith(".json"):
145
+ file_obj.seek(0)
146
+ content = file_obj.read().decode('utf-8', errors='ignore')
147
+ choice = "doc"
148
+ elif file_obj.name.endswith(".cpp"):
149
+ file_obj.seek(0)
150
+ content = file_obj.read().decode('utf-8', errors='ignore')
151
+ choice = "doc"
152
+ elif file_obj.name.endswith(".md"):
153
+ file_obj.seek(0)
154
+ content = file_obj.read().decode('utf-8', errors='ignore')
155
+ choice = "doc"
156
+ elif file_obj.name.endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".webp")):
157
+ file_obj.seek(0)
158
+ content = Image.open(file_obj).convert('RGB')
159
+ choice = "image"
160
+ else:
161
+ raise ValueError("Unsupported file type.")
162
+
163
  return choice, content
164
 
165
+ except Exception as e:
166
+ raise ValueError(f"Error processing file: {str(e)}")
167
 
168
 
169
  @spaces.GPU()