Nekochu commited on
Commit
b11e705
1 Parent(s): 88bb7df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -33
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  from threading import Thread
3
  from typing import Iterator
4
 
@@ -11,9 +10,15 @@ MAX_MAX_NEW_TOKENS = 2048
11
  DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
 
 
 
 
 
14
  DESCRIPTION = """\
15
- # Nekochu/Luminia-13B-v3
16
- This Space demonstrates model Nekochu/Luminia-13B-v3 by Nekochu, a Llama 2 model with 13B parameters fine-tuned for SD gen prompt
 
17
  """
18
 
19
  LICENSE = """
@@ -21,41 +26,26 @@ LICENSE = """
21
  ---.
22
  """
23
 
24
- def load_model(model_id):
25
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
26
- tokenizer = AutoTokenizer.from_pretrained(model_id)
27
- tokenizer.use_default_system_prompt = False
28
- return model, tokenizer
29
-
30
  if not torch.cuda.is_available():
31
- DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
32
 
33
- if torch.cuda.is_available():
34
- model_id = "Nekochu/Luminia-13B-v3"
35
- model, tokenizer = load_model(model_id)
36
 
37
- MODELS = [
38
- {"name": "Nekochu/Luminia-13B-v3", "id": "Nekochu/Luminia-13B-v3"},
39
- {"name": "Nekochu/Llama-2-13B-German-ORPO", "id": "Nekochu/Llama-2-13B-German-ORPO"},
40
- # Add more models here in the future
41
- ]
42
-
43
- @spaces.GPU(duration=120)
44
  def generate(
45
- model_dropdown: str,
46
- custom_model_id: str,
47
  message: str,
48
  chat_history: list[tuple[str, str]],
49
  system_prompt: str,
 
50
  max_new_tokens: int = 1024,
51
  temperature: float = 0.6,
52
  top_p: float = 0.9,
53
  top_k: int = 50,
54
  repetition_penalty: float = 1.2,
55
  ) -> Iterator[str]:
56
- # Prioritize custom model ID if provided, otherwise use the dropdown selection
57
- selected_model_id = custom_model_id if custom_model_id else model_dropdown
58
- model, tokenizer = load_model(selected_model_id)
 
 
59
 
60
  conversation = []
61
  if system_prompt:
@@ -90,25 +80,20 @@ def generate(
90
  outputs.append(text)
91
  yield "".join(outputs)
92
 
93
- model_dropdown = gr.Dropdown(
94
- label="Select Predefined Model",
95
- choices=[model["name"] for model in MODELS],
96
- value=MODELS[0]["name"], # Default to the first model
97
- )
98
- custom_model_id_input = gr.Textbox(label="Or Enter Custom Model ID", placeholder="Enter model ID here")
99
 
100
  chat_interface = gr.ChatInterface(
101
  fn=generate,
102
  additional_inputs=[
103
  model_dropdown,
104
- custom_model_id_input,
105
  gr.Textbox(label="System prompt", lines=6),
106
  gr.Slider(
107
  label="Max new tokens",
108
  minimum=1,
109
  maximum=MAX_MAX_NEW_TOKENS,
110
  step=1,
111
- value=DEFAULT_MAX_NEW_TOKENS,
112
  ),
113
  gr.Slider(
114
  label="Temperature",
 
 
1
  from threading import Thread
2
  from typing import Iterator
3
 
 
10
  DEFAULT_MAX_NEW_TOKENS = 1024
11
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
12
 
13
+ MODELS = {
14
+ "Nekochu/Luminia-13B-v3": "Default - Nekochu/Luminia-13B-v3",
15
+ "Nekochu/Llama-2-13B-German-ORPO": "German ORPO - Nekochu/Llama-2-13B-German-ORPO",
16
+ }
17
+
18
  DESCRIPTION = """\
19
+ # Text Generation with Selectable Models
20
+
21
+ This Space demonstrates text generation using different models. Choose a model from the dropdown and experience its creative capabilities!
22
  """
23
 
24
  LICENSE = """
 
26
  ---.
27
  """
28
 
 
 
 
 
 
 
29
  if not torch.cuda.is_available():
30
+ DESCRIPTION += "\n<p>Running on CPU This demo does not work on CPU.</p>"
31
 
 
 
 
32
 
 
 
 
 
 
 
 
33
  def generate(
 
 
34
  message: str,
35
  chat_history: list[tuple[str, str]],
36
  system_prompt: str,
37
+ model_id: str = None, # Add default value for model_id
38
  max_new_tokens: int = 1024,
39
  temperature: float = 0.6,
40
  top_p: float = 0.9,
41
  top_k: int = 50,
42
  repetition_penalty: float = 1.2,
43
  ) -> Iterator[str]:
44
+ if not model_id:
45
+ raise ValueError("Please select a model from the dropdown.")
46
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
47
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
48
+ tokenizer.use_default_system_prompt = False
49
 
50
  conversation = []
51
  if system_prompt:
 
80
  outputs.append(text)
81
  yield "".join(outputs)
82
 
83
+
84
+ model_dropdown = gr.Dropdown(label="Select Model", choices=list(MODELS.values()))
 
 
 
 
85
 
86
  chat_interface = gr.ChatInterface(
87
  fn=generate,
88
  additional_inputs=[
89
  model_dropdown,
 
90
  gr.Textbox(label="System prompt", lines=6),
91
  gr.Slider(
92
  label="Max new tokens",
93
  minimum=1,
94
  maximum=MAX_MAX_NEW_TOKENS,
95
  step=1,
96
+ value=DEFAULT_MAX
97
  ),
98
  gr.Slider(
99
  label="Temperature",