Alibrown commited on
Commit
ab8fe05
·
verified ·
1 Parent(s): e4356f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -50,25 +50,33 @@ def process_file(uploaded_file):
50
  return {"type": "text", "content": "".join(page.extract_text() for page in reader.pages if page.extract_text())}
51
 
52
  if file_type == "zip":
53
- with zipfile.ZipFile(uploaded_file) as z:
54
- newline = "\n"
55
- content = f"ZIP Contents:{newline}"
56
-
57
- # Jede Datei im ZIP lesen
58
- for file_info in z.infolist():
59
- if not file_info.is_dir(): # Ignoriere Ordner
60
- try:
61
- with z.open(file_info.filename) as file:
62
- # Nur Textdateien lesen (erweiterbare Whitelist)
63
- if file_info.filename.lower().endswith(('.txt', '.csv', '.py', '.html', '.js', '.css', '.pdf')):
64
- file_content = file.read().decode('utf-8')
65
- content += f"{newline}📁 Datei: {file_info.filename}{newline}{file_content}{newline}"
66
- else:
 
 
 
 
 
 
 
 
67
  content += f"{newline}⚠️ Binärdatei ignoriert: {file_info.filename}{newline}"
68
- except Exception as e:
69
- content += f"{newline}❌ Fehler beim Lesen von {file_info.filename}: {str(e)}{newline}"
70
-
71
- return {"type": "text", "content": content}
72
 
73
  return {"type": "error", "content": "Unsupported file format"}
74
  # Sidebar für Einstellungen
 
50
  return {"type": "text", "content": "".join(page.extract_text() for page in reader.pages if page.extract_text())}
51
 
52
  if file_type == "zip":
53
+ with zipfile.ZipFile(uploaded_file) as z:
54
+ newline = "\n"
55
+ content = f"ZIP Contents:{newline}"
56
+
57
+ # Whitelist für Textdateien erweitern
58
+ text_extensions = ('.txt', '.csv', '.py', '.html', '.js', '.css', '.php', '.json', '.xml')
59
+
60
+ for file_info in z.infolist():
61
+ if not file_info.is_dir():
62
+ try:
63
+ with z.open(file_info.filename) as file:
64
+ # Check 1: Erlaubte Dateiendung
65
+ if file_info.filename.lower().endswith(text_extensions):
66
+ file_content = file.read().decode('utf-8')
67
+ content += f"{newline}📄 {file_info.filename}:{newline}{file_content}{newline}"
68
+ # Check 2: Ist es trotzdem Text? (Magische Bytes prüfen)
69
+ else:
70
+ raw_content = file.read()
71
+ try:
72
+ decoded_content = raw_content.decode('utf-8')
73
+ content += f"{newline}📄 {file_info.filename} (unbekannte Erweiterung):{newline}{decoded_content}{newline}"
74
+ except UnicodeDecodeError:
75
  content += f"{newline}⚠️ Binärdatei ignoriert: {file_info.filename}{newline}"
76
+ except Exception as e:
77
+ content += f"{newline}❌ Fehler bei {file_info.filename}: {str(e)}{newline}"
78
+
79
+ return {"type": "text", "content": content}
80
 
81
  return {"type": "error", "content": "Unsupported file format"}
82
  # Sidebar für Einstellungen