Spaces:
Running
on
Zero
Running
on
Zero
cutechicken
commited on
Commit
โข
6adfca3
1
Parent(s):
1fb62e7
Update app.py
Browse files
app.py
CHANGED
@@ -193,7 +193,101 @@ def read_uploaded_file(file):
|
|
193 |
return f"โ ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}", "error"
|
194 |
|
195 |
|
196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
|
199 |
@spaces.GPU
|
|
|
193 |
return f"โ ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}", "error"
|
194 |
|
195 |
|
196 |
+
def read_uploaded_file(file):
|
197 |
+
if file is None:
|
198 |
+
return "", ""
|
199 |
+
try:
|
200 |
+
file_ext = os.path.splitext(file.name)[1].lower()
|
201 |
+
|
202 |
+
if file_ext == '.parquet':
|
203 |
+
df = pd.read_parquet(file.name)
|
204 |
+
content = f"๐ ๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
|
205 |
+
content += f"\n๐ ๋ฐ์ดํฐ ์ ๋ณด:\n"
|
206 |
+
content += f"- ์ ์ฒด ํ ์: {len(df)}\n"
|
207 |
+
content += f"- ์ ์ฒด ์ด ์: {len(df.columns)}\n"
|
208 |
+
content += f"- ์ปฌ๋ผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
|
209 |
+
return content, "parquet"
|
210 |
+
|
211 |
+
elif file_ext == '.csv':
|
212 |
+
encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
|
213 |
+
for encoding in encodings:
|
214 |
+
try:
|
215 |
+
df = pd.read_csv(file.name, encoding=encoding)
|
216 |
+
content = f"๐ ๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
|
217 |
+
content += f"\n๐ ๋ฐ์ดํฐ ์ ๋ณด:\n"
|
218 |
+
content += f"- ์ ์ฒด ํ ์: {len(df)}\n"
|
219 |
+
content += f"- ์ ์ฒด ์ด ์: {len(df.columns)}\n"
|
220 |
+
content += f"- ์ปฌ๋ผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
|
221 |
+
content += f"\n๐ ์ปฌ๋ผ ๋ฐ์ดํฐ ํ์
:\n"
|
222 |
+
for col, dtype in df.dtypes.items():
|
223 |
+
content += f"- {col}: {dtype}\n"
|
224 |
+
null_counts = df.isnull().sum()
|
225 |
+
if null_counts.any():
|
226 |
+
content += f"\nโ ๏ธ ๊ฒฐ์ธก์น:\n"
|
227 |
+
for col, null_count in null_counts[null_counts > 0].items():
|
228 |
+
content += f"- {col}: {null_count}๊ฐ ๋๋ฝ\n"
|
229 |
+
return content, "csv"
|
230 |
+
except UnicodeDecodeError:
|
231 |
+
continue
|
232 |
+
raise UnicodeDecodeError(f"์ง์๋๋ ์ธ์ฝ๋ฉ์ผ๋ก ํ์ผ์ ์ฝ์ ์ ์์ต๋๋ค ({', '.join(encodings)})")
|
233 |
+
|
234 |
+
else: # ํ
์คํธ ํ์ผ
|
235 |
+
encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
|
236 |
+
for encoding in encodings:
|
237 |
+
try:
|
238 |
+
with open(file.name, 'r', encoding=encoding) as f:
|
239 |
+
content = f.read()
|
240 |
+
|
241 |
+
# ํ์ผ ๋ด์ฉ ๋ถ์
|
242 |
+
lines = content.split('\n')
|
243 |
+
total_lines = len(lines)
|
244 |
+
non_empty_lines = len([line for line in lines if line.strip()])
|
245 |
+
|
246 |
+
# ์ฝ๋ ํ์ผ ์ฌ๋ถ ํ์ธ
|
247 |
+
is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
|
248 |
+
|
249 |
+
if is_code:
|
250 |
+
# ์ฝ๋ ํ์ผ ๋ถ์
|
251 |
+
functions = len([line for line in lines if 'def ' in line])
|
252 |
+
classes = len([line for line in lines if 'class ' in line])
|
253 |
+
imports = len([line for line in lines if 'import ' in line or 'from ' in line])
|
254 |
+
|
255 |
+
analysis = f"\n๐ ์ฝ๋ ๋ถ์:\n"
|
256 |
+
analysis += f"- ์ ์ฒด ๋ผ์ธ ์: {total_lines}\n"
|
257 |
+
analysis += f"- ํจ์ ์: {functions}\n"
|
258 |
+
analysis += f"- ํด๋์ค ์: {classes}\n"
|
259 |
+
analysis += f"- import ๋ฌธ ์: {imports}\n"
|
260 |
+
else:
|
261 |
+
# ์ผ๋ฐ ํ
์คํธ ํ์ผ ๋ถ์
|
262 |
+
words = len(content.split())
|
263 |
+
chars = len(content)
|
264 |
+
|
265 |
+
analysis = f"\n๐ ํ
์คํธ ๋ถ์:\n"
|
266 |
+
analysis += f"- ์ ์ฒด ๋ผ์ธ ์: {total_lines}\n"
|
267 |
+
analysis += f"- ์ค์ ๋ด์ฉ์ด ์๋ ๋ผ์ธ ์: {non_empty_lines}\n"
|
268 |
+
analysis += f"- ๋จ์ด ์: {words}\n"
|
269 |
+
analysis += f"- ๋ฌธ์ ์: {chars}\n"
|
270 |
+
|
271 |
+
return content + analysis, "text"
|
272 |
+
except UnicodeDecodeError:
|
273 |
+
continue
|
274 |
+
raise UnicodeDecodeError(f"์ง์๋๋ ์ธ์ฝ๋ฉ์ผ๋ก ํ์ผ์ ์ฝ์ ์ ์์ต๋๋ค ({', '.join(encodings)})")
|
275 |
+
|
276 |
+
except Exception as e:
|
277 |
+
return f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}", "error"
|
278 |
+
|
279 |
+
# ํ์ผ ์
๋ก๋ ์ด๋ฒคํธ ํธ๋ค๋ง ์์
|
280 |
+
def init_msg():
|
281 |
+
return "ํ์ผ์ ๋ถ์ํ๊ณ ์์ต๋๋ค..."
|
282 |
+
|
283 |
+
file_upload.change(
|
284 |
+
init_msg,
|
285 |
+
outputs=msg
|
286 |
+
).then(
|
287 |
+
stream_chat,
|
288 |
+
inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
|
289 |
+
outputs=[msg, chatbot]
|
290 |
+
)
|
291 |
|
292 |
|
293 |
@spaces.GPU
|