cutechicken commited on
Commit
6adfca3
โ€ข
1 Parent(s): 1fb62e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -1
app.py CHANGED
@@ -193,7 +193,101 @@ def read_uploaded_file(file):
193
  return f"โŒ ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}", "error"
194
 
195
 
196
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
 
199
  @spaces.GPU
 
193
  return f"โŒ ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}", "error"
194
 
195
 
196
+ def read_uploaded_file(file):
197
+ if file is None:
198
+ return "", ""
199
+ try:
200
+ file_ext = os.path.splitext(file.name)[1].lower()
201
+
202
+ if file_ext == '.parquet':
203
+ df = pd.read_parquet(file.name)
204
+ content = f"๐Ÿ“Š ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
205
+ content += f"\n๐Ÿ“ˆ ๋ฐ์ดํ„ฐ ์ •๋ณด:\n"
206
+ content += f"- ์ „์ฒด ํ–‰ ์ˆ˜: {len(df)}\n"
207
+ content += f"- ์ „์ฒด ์—ด ์ˆ˜: {len(df.columns)}\n"
208
+ content += f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
209
+ return content, "parquet"
210
+
211
+ elif file_ext == '.csv':
212
+ encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
213
+ for encoding in encodings:
214
+ try:
215
+ df = pd.read_csv(file.name, encoding=encoding)
216
+ content = f"๐Ÿ“Š ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
217
+ content += f"\n๐Ÿ“ˆ ๋ฐ์ดํ„ฐ ์ •๋ณด:\n"
218
+ content += f"- ์ „์ฒด ํ–‰ ์ˆ˜: {len(df)}\n"
219
+ content += f"- ์ „์ฒด ์—ด ์ˆ˜: {len(df.columns)}\n"
220
+ content += f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
221
+ content += f"\n๐Ÿ“‹ ์ปฌ๋Ÿผ ๋ฐ์ดํ„ฐ ํƒ€์ž…:\n"
222
+ for col, dtype in df.dtypes.items():
223
+ content += f"- {col}: {dtype}\n"
224
+ null_counts = df.isnull().sum()
225
+ if null_counts.any():
226
+ content += f"\nโš ๏ธ ๊ฒฐ์ธก์น˜:\n"
227
+ for col, null_count in null_counts[null_counts > 0].items():
228
+ content += f"- {col}: {null_count}๊ฐœ ๋ˆ„๋ฝ\n"
229
+ return content, "csv"
230
+ except UnicodeDecodeError:
231
+ continue
232
+ raise UnicodeDecodeError(f"์ง€์›๋˜๋Š” ์ธ์ฝ”๋”ฉ์œผ๋กœ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค ({', '.join(encodings)})")
233
+
234
+ else: # ํ…์ŠคํŠธ ํŒŒ์ผ
235
+ encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
236
+ for encoding in encodings:
237
+ try:
238
+ with open(file.name, 'r', encoding=encoding) as f:
239
+ content = f.read()
240
+
241
+ # ํŒŒ์ผ ๋‚ด์šฉ ๋ถ„์„
242
+ lines = content.split('\n')
243
+ total_lines = len(lines)
244
+ non_empty_lines = len([line for line in lines if line.strip()])
245
+
246
+ # ์ฝ”๋“œ ํŒŒ์ผ ์—ฌ๋ถ€ ํ™•์ธ
247
+ is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
248
+
249
+ if is_code:
250
+ # ์ฝ”๋“œ ํŒŒ์ผ ๋ถ„์„
251
+ functions = len([line for line in lines if 'def ' in line])
252
+ classes = len([line for line in lines if 'class ' in line])
253
+ imports = len([line for line in lines if 'import ' in line or 'from ' in line])
254
+
255
+ analysis = f"\n๐Ÿ“ ์ฝ”๋“œ ๋ถ„์„:\n"
256
+ analysis += f"- ์ „์ฒด ๋ผ์ธ ์ˆ˜: {total_lines}\n"
257
+ analysis += f"- ํ•จ์ˆ˜ ์ˆ˜: {functions}\n"
258
+ analysis += f"- ํด๋ž˜์Šค ์ˆ˜: {classes}\n"
259
+ analysis += f"- import ๋ฌธ ์ˆ˜: {imports}\n"
260
+ else:
261
+ # ์ผ๋ฐ˜ ํ…์ŠคํŠธ ํŒŒ์ผ ๋ถ„์„
262
+ words = len(content.split())
263
+ chars = len(content)
264
+
265
+ analysis = f"\n๐Ÿ“ ํ…์ŠคํŠธ ๋ถ„์„:\n"
266
+ analysis += f"- ์ „์ฒด ๋ผ์ธ ์ˆ˜: {total_lines}\n"
267
+ analysis += f"- ์‹ค์ œ ๋‚ด์šฉ์ด ์žˆ๋Š” ๋ผ์ธ ์ˆ˜: {non_empty_lines}\n"
268
+ analysis += f"- ๋‹จ์–ด ์ˆ˜: {words}\n"
269
+ analysis += f"- ๋ฌธ์ž ์ˆ˜: {chars}\n"
270
+
271
+ return content + analysis, "text"
272
+ except UnicodeDecodeError:
273
+ continue
274
+ raise UnicodeDecodeError(f"์ง€์›๋˜๋Š” ์ธ์ฝ”๋”ฉ์œผ๋กœ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค ({', '.join(encodings)})")
275
+
276
+ except Exception as e:
277
+ return f"ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}", "error"
278
+
279
+ # ํŒŒ์ผ ์—…๋กœ๋“œ ์ด๋ฒคํŠธ ํ•ธ๋“ค๋ง ์ˆ˜์ •
280
+ def init_msg():
281
+ return "ํŒŒ์ผ์„ ๋ถ„์„ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค..."
282
+
283
+ file_upload.change(
284
+ init_msg,
285
+ outputs=msg
286
+ ).then(
287
+ stream_chat,
288
+ inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
289
+ outputs=[msg, chatbot]
290
+ )
291
 
292
 
293
  @spaces.GPU