Spaces:

mateoluksenberg
/

chat

Running

App Files Files Community

mateoluksenberg commited on Aug 7, 2024

Commit

2a0024c

verified ·

1 Parent(s): 49ad315

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -21

app.py CHANGED Viewed

@@ -90,31 +90,80 @@ def extract_pptx(path):
     return text
-def mode_load(path):
-    choice = ""
-    file_type = path.split(".")[-1]
-    print(file_type)
-    if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
-        if file_type.endswith("pdf"):
-            content = extract_pdf(path)
-        elif file_type.endswith("docx"):
-            content = extract_docx(path)
-        elif file_type.endswith("pptx"):
-            content = extract_pptx(path)
-        else:
-            content = extract_text(path)
-        choice = "doc"
-        print(content[:100])
-        return choice, content[:5000]
-    elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
-        content = Image.open(path).convert('RGB')
-        choice = "image"
         return choice, content
-    else:
-        raise gr.Error("Oops, unsupported files.")
 @spaces.GPU()

     return text
+# def mode_load(path):
+#     choice = ""
+#     file_type = path.split(".")[-1]
+#     print(file_type)
+#     if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
+#         if file_type.endswith("pdf"):
+#             content = extract_pdf(path)
+#         elif file_type.endswith("docx"):
+#             content = extract_docx(path)
+#         elif file_type.endswith("pptx"):
+#             content = extract_pptx(path)
+#         else:
+#             content = extract_text(path)
+#         choice = "doc"
+#         print(content[:100])
+#         return choice, content[:5000]
+#     elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
+#         content = Image.open(path).convert('RGB')
+#         choice = "image"
+#         return choice, content
+#     else:
+#         raise gr.Error("Oops, unsupported files.")
+def mode_load(file_obj):
+    # Intenta detectar el tipo de archivo basado en su contenido
+    try:
+        file_obj.seek(0)  # Asegúrate de que el puntero esté al inicio del archivo
+        # Verifica si es PDF
+        if file_obj.read(4) == b'%PDF':
+            file_obj.seek(0)  # Vuelve al inicio del archivo para procesar
+            content = extract_pdf(file_obj)
+            choice = "doc"
+        elif file_obj.name.endswith(".docx"):
+            file_obj.seek(0)
+            content = extract_docx(file_obj)
+            choice = "doc"
+        elif file_obj.name.endswith(".pptx"):
+            file_obj.seek(0)
+            content = extract_pptx(file_obj)
+            choice = "doc"
+        elif file_obj.name.endswith(".txt"):
+            file_obj.seek(0)
+            content = file_obj.read().decode('utf-8', errors='ignore')
+            choice = "doc"
+        elif file_obj.name.endswith(".py"):
+            file_obj.seek(0)
+            content = file_obj.read().decode('utf-8', errors='ignore')
+            choice = "doc"
+        elif file_obj.name.endswith(".json"):
+            file_obj.seek(0)
+            content = file_obj.read().decode('utf-8', errors='ignore')
+            choice = "doc"
+        elif file_obj.name.endswith(".cpp"):
+            file_obj.seek(0)
+            content = file_obj.read().decode('utf-8', errors='ignore')
+            choice = "doc"
+        elif file_obj.name.endswith(".md"):
+            file_obj.seek(0)
+            content = file_obj.read().decode('utf-8', errors='ignore')
+            choice = "doc"
+        elif file_obj.name.endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".webp")):
+            file_obj.seek(0)
+            content = Image.open(file_obj).convert('RGB')
+            choice = "image"
+        else:
+            raise ValueError("Unsupported file type.")
         return choice, content
+    except Exception as e:
+        raise ValueError(f"Error processing file: {str(e)}")
 @spaces.GPU()