openfree commited on
Commit
574f73e
1 Parent(s): 18c5cff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -45
app.py CHANGED
@@ -1,7 +1,13 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
3
  import concurrent.futures
4
 
 
 
 
 
5
  # Available LLM models
6
  LLM_MODELS = {
7
  "Llama-3.3": "meta-llama/Llama-3.3-70B-Instruct",
@@ -20,24 +26,27 @@ DEFAULT_MODELS = [
20
  "mistralai/Mistral-Nemo-Instruct-2407"
21
  ]
22
 
23
- clients = {model: InferenceClient(model) for model in LLM_MODELS.values()}
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- def process_file(file):
26
  if file is None:
27
  return ""
28
  if file.name.endswith(('.txt', '.md')):
29
  return file.read().decode('utf-8')
30
  return f"Uploaded file: {file.name}"
31
 
32
- def respond_single(
33
- client,
34
- message,
35
- history,
36
- system_message,
37
- max_tokens,
38
- temperature,
39
- top_p,
40
- ):
41
  messages = [{"role": "system", "content": system_message}]
42
 
43
  for user, assistant in history:
@@ -47,34 +56,50 @@ def respond_single(
47
  messages.append({"role": "assistant", "content": assistant})
48
 
49
  messages.append({"role": "user", "content": message})
50
-
51
- response = ""
 
 
 
 
 
 
 
52
  try:
53
- for msg in client.chat_completion(
54
- messages,
55
- max_tokens=max_tokens,
56
- stream=True,
 
57
  temperature=temperature,
58
  top_p=top_p,
59
- ):
60
- token = msg.choices[0].delta.content
61
- response += token
62
- yield response
 
 
 
 
 
 
 
 
63
  except Exception as e:
64
  yield f"Error: {str(e)}"
65
 
66
  def respond_all(
67
- message,
68
  file,
69
- history1,
70
- history2,
71
- history3,
72
- selected_models,
73
- system_message,
74
- max_tokens,
75
- temperature,
76
- top_p,
77
- ):
78
  if file:
79
  file_content = process_file(file)
80
  message = f"{message}\n\nFile content:\n{file_content}"
@@ -82,21 +107,14 @@ def respond_all(
82
  while len(selected_models) < 3:
83
  selected_models.append(selected_models[-1])
84
 
85
- def generate(client, history):
86
- return respond_single(
87
- client,
88
- message,
89
- history,
90
- system_message,
91
- max_tokens,
92
- temperature,
93
- top_p,
94
- )
95
 
96
  return (
97
- generate(clients[selected_models[0]], history1),
98
- generate(clients[selected_models[1]], history2),
99
- generate(clients[selected_models[2]], history3),
100
  )
101
 
102
  with gr.Blocks() as demo:
@@ -186,4 +204,7 @@ with gr.Blocks() as demo:
186
  )
187
 
188
  if __name__ == "__main__":
 
 
 
189
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import os
4
+ from typing import List, Tuple, Generator
5
  import concurrent.futures
6
 
7
+ # Hugging Face 토큰 설정
8
+ os.environ["TOKENIZERS_PARALLELISM"] = "false" # 경고 메시지 방지
9
+ HF_TOKEN = os.getenv("HF_TOKEN")
10
+
11
  # Available LLM models
12
  LLM_MODELS = {
13
  "Llama-3.3": "meta-llama/Llama-3.3-70B-Instruct",
 
26
  "mistralai/Mistral-Nemo-Instruct-2407"
27
  ]
28
 
29
+ # Pipeline 초기화
30
+ pipes = {}
31
+ for model_name in LLM_MODELS.values():
32
+ try:
33
+ pipes[model_name] = pipeline(
34
+ "text-generation",
35
+ model=model_name,
36
+ token=HF_TOKEN,
37
+ device_map="auto"
38
+ )
39
+ except Exception as e:
40
+ print(f"Failed to load model {model_name}: {str(e)}")
41
 
42
+ def process_file(file) -> str:
43
  if file is None:
44
  return ""
45
  if file.name.endswith(('.txt', '.md')):
46
  return file.read().decode('utf-8')
47
  return f"Uploaded file: {file.name}"
48
 
49
+ def format_messages(message: str, history: List[Tuple[str, str]], system_message: str) -> List[dict]:
 
 
 
 
 
 
 
 
50
  messages = [{"role": "system", "content": system_message}]
51
 
52
  for user, assistant in history:
 
56
  messages.append({"role": "assistant", "content": assistant})
57
 
58
  messages.append({"role": "user", "content": message})
59
+ return messages
60
+
61
+ def generate_response(
62
+ pipe,
63
+ messages: List[dict],
64
+ max_tokens: int,
65
+ temperature: float,
66
+ top_p: float
67
+ ) -> Generator[str, None, None]:
68
  try:
69
+ formatted_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
70
+
71
+ response = pipe(
72
+ formatted_prompt,
73
+ max_new_tokens=max_tokens,
74
  temperature=temperature,
75
  top_p=top_p,
76
+ do_sample=True,
77
+ pad_token_id=50256,
78
+ num_return_sequences=1,
79
+ streaming=True
80
+ )
81
+
82
+ generated_text = ""
83
+ for output in response:
84
+ new_text = output[0]['generated_text'][len(formatted_prompt):].strip()
85
+ generated_text = new_text
86
+ yield generated_text
87
+
88
  except Exception as e:
89
  yield f"Error: {str(e)}"
90
 
91
  def respond_all(
92
+ message: str,
93
  file,
94
+ history1: List[Tuple[str, str]],
95
+ history2: List[Tuple[str, str]],
96
+ history3: List[Tuple[str, str]],
97
+ selected_models: List[str],
98
+ system_message: str,
99
+ max_tokens: int,
100
+ temperature: float,
101
+ top_p: float,
102
+ ) -> Tuple[Generator[str, None, None], Generator[str, None, None], Generator[str, None, None]]:
103
  if file:
104
  file_content = process_file(file)
105
  message = f"{message}\n\nFile content:\n{file_content}"
 
107
  while len(selected_models) < 3:
108
  selected_models.append(selected_models[-1])
109
 
110
+ def generate(pipe, history):
111
+ messages = format_messages(message, history, system_message)
112
+ return generate_response(pipe, messages, max_tokens, temperature, top_p)
 
 
 
 
 
 
 
113
 
114
  return (
115
+ generate(pipes[selected_models[0]], history1),
116
+ generate(pipes[selected_models[1]], history2),
117
+ generate(pipes[selected_models[2]], history3),
118
  )
119
 
120
  with gr.Blocks() as demo:
 
204
  )
205
 
206
  if __name__ == "__main__":
207
+ # Hugging Face 토큰이 설정되어 있는지 확인
208
+ if not HF_TOKEN:
209
+ print("Warning: HF_TOKEN environment variable is not set")
210
  demo.launch()