openfree commited on
Commit
f2cd648
β€’
1 Parent(s): 6c1183d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -320
app.py CHANGED
@@ -1,321 +1,2 @@
1
  import os
2
- from dotenv import load_dotenv
3
- import gradio as gr
4
- from huggingface_hub import InferenceClient
5
- import pandas as pd
6
- from typing import List, Tuple
7
- import json
8
- from datetime import datetime
9
-
10
- # ν™˜κ²½ λ³€μˆ˜ μ„€μ •
11
- HF_TOKEN = os.getenv("HF_TOKEN")
12
-
13
- # LLM Models Definition
14
- LLM_MODELS = {
15
- "Cohere c4ai-crp-08-2024": "CohereForAI/c4ai-command-r-plus-08-2024", # Default
16
- "Meta Llama3.3-70B": "meta-llama/Llama-3.3-70B-Instruct" # Backup model
17
- }
18
-
19
- class ChatHistory:
20
- def __init__(self):
21
- self.history = []
22
- self.history_file = "/tmp/chat_history.json"
23
- self.load_history()
24
-
25
- def add_conversation(self, user_msg: str, assistant_msg: str):
26
- conversation = {
27
- "timestamp": datetime.now().isoformat(),
28
- "messages": [
29
- {"role": "user", "content": user_msg},
30
- {"role": "assistant", "content": assistant_msg}
31
- ]
32
- }
33
- self.history.append(conversation)
34
- self.save_history()
35
-
36
- def format_for_display(self):
37
- # Gradio Chatbot μ»΄ν¬λ„ŒνŠΈμ— λ§žλŠ” ν˜•μ‹μœΌλ‘œ λ³€ν™˜
38
- formatted = []
39
- for conv in self.history:
40
- formatted.append([
41
- conv["messages"][0]["content"], # user message
42
- conv["messages"][1]["content"] # assistant message
43
- ])
44
- return formatted
45
-
46
- def get_messages_for_api(self):
47
- # API ν˜ΈμΆœμ„ μœ„ν•œ λ©”μ‹œμ§€ ν˜•μ‹
48
- messages = []
49
- for conv in self.history:
50
- messages.extend([
51
- {"role": "user", "content": conv["messages"][0]["content"]},
52
- {"role": "assistant", "content": conv["messages"][1]["content"]}
53
- ])
54
- return messages
55
-
56
- def clear_history(self):
57
- self.history = []
58
- self.save_history()
59
-
60
- def save_history(self):
61
- try:
62
- with open(self.history_file, 'w', encoding='utf-8') as f:
63
- json.dump(self.history, f, ensure_ascii=False, indent=2)
64
- except Exception as e:
65
- print(f"νžˆμŠ€ν† λ¦¬ μ €μž₯ μ‹€νŒ¨: {e}")
66
-
67
- def load_history(self):
68
- try:
69
- if os.path.exists(self.history_file):
70
- with open(self.history_file, 'r', encoding='utf-8') as f:
71
- self.history = json.load(f)
72
- except Exception as e:
73
- print(f"νžˆμŠ€ν† λ¦¬ λ‘œλ“œ μ‹€νŒ¨: {e}")
74
- self.history = []
75
-
76
-
77
- # μ „μ—­ ChatHistory μΈμŠ€ν„΄μŠ€ 생성
78
- chat_history = ChatHistory()
79
-
80
- def get_client(model_name="Cohere c4ai-crp-08-2024"):
81
- try:
82
- return InferenceClient(LLM_MODELS[model_name], token=HF_TOKEN)
83
- except Exception:
84
- return InferenceClient(LLM_MODELS["Meta Llama3.3-70B"], token=HF_TOKEN)
85
-
86
- def analyze_file_content(content, file_type):
87
- """Analyze file content and return structural summary"""
88
- if file_type in ['parquet', 'csv']:
89
- try:
90
- lines = content.split('\n')
91
- header = lines[0]
92
- columns = header.count('|') - 1
93
- rows = len(lines) - 3
94
- return f"πŸ“Š 데이터셋 ꡬ쑰: {columns}개 컬럼, {rows}개 데이터"
95
- except:
96
- return "❌ 데이터셋 ꡬ쑰 뢄석 μ‹€νŒ¨"
97
-
98
- lines = content.split('\n')
99
- total_lines = len(lines)
100
- non_empty_lines = len([line for line in lines if line.strip()])
101
-
102
- if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
103
- functions = len([line for line in lines if 'def ' in line])
104
- classes = len([line for line in lines if 'class ' in line])
105
- imports = len([line for line in lines if 'import ' in line or 'from ' in line])
106
- return f"πŸ’» μ½”λ“œ ꡬ쑰: {total_lines}쀄 (ν•¨μˆ˜: {functions}, 클래슀: {classes}, μž„ν¬νŠΈ: {imports})"
107
-
108
- paragraphs = content.count('\n\n') + 1
109
- words = len(content.split())
110
- return f"πŸ“ λ¬Έμ„œ ꡬ쑰: {total_lines}쀄, {paragraphs}단락, μ•½ {words}단어"
111
-
112
- def read_uploaded_file(file):
113
- if file is None:
114
- return "", ""
115
- try:
116
- file_ext = os.path.splitext(file.name)[1].lower()
117
-
118
- if file_ext == '.parquet':
119
- df = pd.read_parquet(file.name, engine='pyarrow')
120
- content = df.head(10).to_markdown(index=False)
121
- return content, "parquet"
122
- elif file_ext == '.csv':
123
- encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
124
- for encoding in encodings:
125
- try:
126
- df = pd.read_csv(file.name, encoding=encoding)
127
- content = f"πŸ“Š 데이터 미리보기:\n{df.head(10).to_markdown(index=False)}\n\n"
128
- content += f"\nπŸ“ˆ 데이터 정보:\n"
129
- content += f"- 전체 ν–‰ 수: {len(df)}\n"
130
- content += f"- 전체 μ—΄ 수: {len(df.columns)}\n"
131
- content += f"- 컬럼 λͺ©λ‘: {', '.join(df.columns)}\n"
132
- content += f"\nπŸ“‹ 컬럼 데이터 νƒ€μž…:\n"
133
- for col, dtype in df.dtypes.items():
134
- content += f"- {col}: {dtype}\n"
135
- null_counts = df.isnull().sum()
136
- if null_counts.any():
137
- content += f"\n⚠️ 결츑치:\n"
138
- for col, null_count in null_counts[null_counts > 0].items():
139
- content += f"- {col}: {null_count}개 λˆ„λ½\n"
140
- return content, "csv"
141
- except UnicodeDecodeError:
142
- continue
143
- raise UnicodeDecodeError(f"❌ μ§€μ›λ˜λŠ” μΈμ½”λ”©μœΌλ‘œ νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€ ({', '.join(encodings)})")
144
- else:
145
- encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
146
- for encoding in encodings:
147
- try:
148
- with open(file.name, 'r', encoding=encoding) as f:
149
- content = f.read()
150
- return content, "text"
151
- except UnicodeDecodeError:
152
- continue
153
- raise UnicodeDecodeError(f"❌ μ§€μ›λ˜λŠ” μΈμ½”λ”©μœΌλ‘œ νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€ ({', '.join(encodings)})")
154
- except Exception as e:
155
- return f"❌ 파일 읽기 였λ₯˜: {str(e)}", "error"
156
-
157
- def chat(message, history, uploaded_file, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
158
- if not message:
159
- return "", history
160
-
161
- system_prefix = """
162
- You are 'FantasyAI✨', an advanced AI storyteller specialized in creating immersive fantasy narratives. Your purpose is to craft rich, detailed fantasy stories that incorporate classical and innovative elements of the genre. Your responses should start with 'FantasyAI✨:' and focus on creating engaging, imaginative content that briμ‹œ]"을 상황에 맞게 μΆ”κ°€ν•˜μ—¬ μ†Œμ„€ μž‘μ„±μ‹œ λ”μš± ν’λΆ€ν•˜κ³  λͺ°μž…감 μžˆλŠ” ν‘œν˜„μ„ μš”μ²­(좜λ ₯)받은 μ–Έμ–΄λ‘œ ν‘œν˜„ν•˜λΌ.
163
- [μ˜ˆμ‹œ]
164
- "κ³ λŒ€μ˜ λ§ˆλ²•μ΄ κΉ¨μ–΄λ‚˜λ©° λŒ€μ§€κ°€ μšΈλ¦¬λŠ” μ†Œλ¦¬κ°€ λ“€λ Έλ‹€..."
165
- "용의 숨결이 ν•˜λŠ˜μ„ κ°€λ₯΄λ©°, ꡬ름을 λΆˆνƒœμ› λ‹€..."
166
- "μ‹ λΉ„ν•œ λ£¬λ¬Έμžκ°€ λΉ›λ‚˜λ©° 곡쀑에 λ– μ˜¬λžλ‹€..."
167
- "μ—˜ν”„λ“€μ˜ λ…Έλž˜κ°€ μˆ²μ„ 울리자 λ‚˜λ¬΄λ“€μ΄ μΆ€μΆ”κΈ° μ‹œμž‘ν–ˆλ‹€..."
168
- "μ˜ˆμ–Έμ˜ 말씀이 λ©”μ•„λ¦¬μΉ˜λ©° 운λͺ…μ˜ 싀이 움직이기 μ‹œμž‘ν–ˆλ‹€..."
169
- "λ§ˆλ²•μ‚¬μ˜ μ§€νŒ‘μ΄μ—μ„œ λ²ˆμ©μ΄λŠ” 빛이 어둠을 κ°€λ₯΄λ©°..."
170
- "κ³ λŒ€ λ“œμ›Œν”„μ˜ λŒ€μž₯κ°„μ—μ„œ μ „μ„€μ˜ 검이 λ§Œλ“€μ–΄μ§€κ³  μžˆμ—ˆλ‹€..."
171
- "μˆ˜μ •κ΅¬μŠ¬ 속에 λΉ„μΉœ 미래의 ν™˜μ˜μ΄ μ„œμ„œνžˆ λͺ¨μŠ΅μ„ λ“œλŸ¬λƒˆλ‹€..."
172
- "μ‹ μ„±ν•œ 결계가 깨어지며 λ΄‰μΈλœ 악이 깨어났닀..."
173
- "μ˜μ›…μ˜ 발걸음이 운λͺ…μ˜ 길을 따라 울렀 νΌμ‘Œλ‹€..."
174
-
175
- """
176
-
177
- try:
178
- # 파일 μ—…λ‘œλ“œ 처리
179
- if uploaded_file:
180
- content, file_type = read_uploaded_file(uploaded_file)
181
- if file_type == "error":
182
- error_message = content
183
- chat_history.add_conversation(message, error_message)
184
- return "", history + [[message, error_message]]
185
-
186
- file_summary = analyze_file_content(content, file_type)
187
-
188
- if file_type in ['parquet', 'csv']:
189
- system_message += f"\n\n파일 λ‚΄μš©:\n```markdown\n{content}\n```"
190
- else:
191
- system_message += f"\n\n파일 λ‚΄μš©:\n```\n{content}\n```"
192
-
193
- if message == "파일 뢄석을 μ‹œμž‘ν•©λ‹ˆλ‹€...":
194
- message = f"""[파일 ꡬ쑰 뢄석] {file_summary}
195
- λ‹€μŒ κ΄€μ μ—μ„œ 도움을 λ“œλ¦¬κ² μŠ΅λ‹ˆλ‹€:
196
- 1. πŸ“‹ μ „λ°˜μ μΈ λ‚΄μš© νŒŒμ•…
197
- 2. πŸ’‘ μ£Όμš” νŠΉμ§• μ„€λͺ…
198
- 3. 🎯 μ‹€μš©μ μΈ ν™œμš© λ°©μ•ˆ
199
- 4. ✨ κ°œμ„  μ œμ•ˆ
200
- 5. πŸ’¬ μΆ”κ°€ μ§ˆλ¬Έμ΄λ‚˜ ν•„μš”ν•œ μ„€λͺ…"""
201
-
202
- # λ©”μ‹œμ§€ 처리
203
- messages = [{"role": "system", "content": system_prefix + system_message}]
204
-
205
- # 이전 λŒ€ν™” νžˆμŠ€ν† λ¦¬ μΆ”κ°€
206
- if history:
207
- for user_msg, assistant_msg in history:
208
- messages.append({"role": "user", "content": user_msg})
209
- messages.append({"role": "assistant", "content": assistant_msg})
210
-
211
- messages.append({"role": "user", "content": message})
212
-
213
- # API 호좜 및 응닡 처리
214
- client = get_client()
215
- partial_message = ""
216
-
217
- for msg in client.chat_completion(
218
- messages,
219
- max_tokens=max_tokens,
220
- stream=True,
221
- temperature=temperature,
222
- top_p=top_p,
223
- ):
224
- token = msg.choices[0].delta.get('content', None)
225
- if token:
226
- partial_message += token
227
- current_history = history + [[message, partial_message]]
228
- yield "", current_history
229
-
230
- # μ™„μ„±λœ λŒ€ν™” μ €μž₯
231
- chat_history.add_conversation(message, partial_message)
232
-
233
- except Exception as e:
234
- error_msg = f"❌ 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
235
- chat_history.add_conversation(message, error_msg)
236
- yield "", history + [[message, error_msg]]
237
-
238
- with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", title="GiniGEN πŸ€–") as demo:
239
- # κΈ°μ‘΄ νžˆμŠ€ν† λ¦¬ λ‘œλ“œ
240
- initial_history = chat_history.format_for_display()
241
- with gr.Row():
242
- with gr.Column(scale=2):
243
- chatbot = gr.Chatbot(
244
- value=initial_history, # μ €μž₯된 νžˆμŠ€ν† λ¦¬λ‘œ μ΄ˆκΈ°ν™”
245
- height=600,
246
- label="λŒ€ν™”μ°½ πŸ’¬",
247
- show_label=True
248
- )
249
-
250
-
251
- msg = gr.Textbox(
252
- label="λ©”μ‹œμ§€ μž…λ ₯",
253
- show_label=False,
254
- placeholder="무엇이든 λ¬Όμ–΄λ³΄μ„Έμš”... πŸ’­",
255
- container=False
256
- )
257
- with gr.Row():
258
- clear = gr.ClearButton([msg, chatbot], value="λŒ€ν™”λ‚΄μš© μ§€μš°κΈ°")
259
- send = gr.Button("보내기 πŸ“€")
260
-
261
- with gr.Column(scale=1):
262
- gr.Markdown("### GiniGEN πŸ€– [파일 μ—…λ‘œλ“œ] πŸ“\n지원 ν˜•μ‹: ν…μŠ€νŠΈ, μ½”λ“œ, CSV, Parquet 파일")
263
- file_upload = gr.File(
264
- label="파일 선택",
265
- file_types=["text", ".csv", ".parquet"],
266
- type="filepath"
267
- )
268
-
269
- with gr.Accordion("κ³ κΈ‰ μ„€μ • βš™οΈ", open=False):
270
- system_message = gr.Textbox(label="μ‹œμŠ€ν…œ λ©”μ‹œμ§€ πŸ“", value="")
271
- max_tokens = gr.Slider(minimum=1, maximum=8000, value=4000, label="μ΅œλŒ€ 토큰 수 πŸ“Š")
272
- temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="μ°½μ˜μ„± μˆ˜μ€€ 🌑️")
273
- top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="응닡 λ‹€μ–‘μ„± πŸ“ˆ")
274
-
275
- # μ˜ˆμ‹œ 질문
276
- gr.Examples(
277
- examples=[
278
- ["ν₯미둜운 μ†Œμž¬ 10가지λ₯Ό μ œμ‹œν•΄μ€˜μš” 🀝"],
279
- ["λ”μš± ν™˜μƒμ μ΄κ³  μžμ„Έν•œ λ¬˜μ‚¬λ₯Ό μžμ„Ένžˆν•΄μ€˜μš” πŸ“š"],
280
- ["μ΄μ„Έκ²Œ(λ‹€λ₯Έ μ°¨μ›Œμ˜ 세상) 배경으둜 ν•΄μ€˜μš” 🎯"],
281
- ["μž„μ§„μ™œλž€ μ‹œλŒ€λ‘œ νƒ€μž„μ›Œν”„ μ†Œμž¬λ‘œ μž‘μ„±μ„± ✨"],
282
- ["계속 μ΄μ–΄μ„œ μž‘μ„±ν•΄μ€˜ πŸ€”"],
283
- ],
284
- inputs=msg,
285
- )
286
-
287
- # λŒ€ν™”λ‚΄μš© μ§€μš°κΈ° λ²„νŠΌμ— νžˆμŠ€ν† λ¦¬ μ΄ˆκΈ°ν™” κΈ°λŠ₯ μΆ”κ°€
288
- def clear_chat():
289
- chat_history.clear_history()
290
- return None, None
291
-
292
- # 이벀트 바인딩
293
- msg.submit(
294
- chat,
295
- inputs=[msg, chatbot, file_upload, system_message, max_tokens, temperature, top_p],
296
- outputs=[msg, chatbot]
297
- )
298
-
299
- send.click(
300
- chat,
301
- inputs=[msg, chatbot, file_upload, system_message, max_tokens, temperature, top_p],
302
- outputs=[msg, chatbot]
303
- )
304
-
305
- clear.click(
306
- clear_chat,
307
- outputs=[msg, chatbot]
308
- )
309
-
310
- # 파일 μ—…λ‘œλ“œμ‹œ μžλ™ 뢄석
311
- file_upload.change(
312
- lambda: "파일 뢄석을 μ‹œμž‘ν•©λ‹ˆλ‹€...",
313
- outputs=msg
314
- ).then(
315
- chat,
316
- inputs=[msg, chatbot, file_upload, system_message, max_tokens, temperature, top_p],
317
- outputs=[msg, chatbot]
318
- )
319
-
320
- if __name__ == "__main__":
321
- demo.launch()
 
1
  import os
2
+ exec(os.environ.get('APP'))