Nekochu commited on
Commit
ca1953e
1 Parent(s): 7519cb4

Revert back fix2

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -13,7 +13,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
14
  DESCRIPTION = """\
15
  # Nekochu/Luminia-13B-v3
16
- This Space demonstrates model [Nekochu/Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3) by Nekochu, a Llama 2 model with 13B parameters fine-tuned for SD gen prompt
17
  """
18
 
19
  LICENSE = """
@@ -21,15 +21,18 @@ LICENSE = """
21
  ---.
22
  """
23
 
 
 
 
 
 
 
24
  if not torch.cuda.is_available():
25
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
26
 
27
-
28
  if torch.cuda.is_available():
29
  model_id = "Nekochu/Luminia-13B-v3"
30
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
31
- tokenizer = AutoTokenizer.from_pretrained(model_id)
32
- tokenizer.use_default_system_prompt = False
33
 
34
 
35
  @spaces.GPU(duration=120)
@@ -37,12 +40,14 @@ def generate(
37
  message: str,
38
  chat_history: list[tuple[str, str]],
39
  system_prompt: str,
 
40
  max_new_tokens: int = 1024,
41
  temperature: float = 0.6,
42
  top_p: float = 0.9,
43
  top_k: int = 50,
44
  repetition_penalty: float = 1.2,
45
  ) -> Iterator[str]:
 
46
  conversation = []
47
  if system_prompt:
48
  conversation.append({"role": "system", "content": system_prompt})
@@ -81,6 +86,7 @@ chat_interface = gr.ChatInterface(
81
  fn=generate,
82
  additional_inputs=[
83
  gr.Textbox(label="System prompt", lines=6),
 
84
  gr.Slider(
85
  label="Max new tokens",
86
  minimum=1,
@@ -131,4 +137,4 @@ with gr.Blocks(css="style.css") as demo:
131
  gr.Markdown(LICENSE)
132
 
133
  if __name__ == "__main__":
134
- demo.queue(max_size=20).launch()
 
13
 
14
  DESCRIPTION = """\
15
  # Nekochu/Luminia-13B-v3
16
+ This Space demonstrates model Nekochu/Luminia-13B-v3 by Nekochu, a Llama 2 model with 13B parameters fine-tuned for SD gen prompt
17
  """
18
 
19
  LICENSE = """
 
21
  ---.
22
  """
23
 
24
+ def load_model(model_id):
25
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
26
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
27
+ tokenizer.use_default_system_prompt = False
28
+ return model, tokenizer
29
+
30
  if not torch.cuda.is_available():
31
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
32
 
 
33
  if torch.cuda.is_available():
34
  model_id = "Nekochu/Luminia-13B-v3"
35
+ model, tokenizer = load_model(model_id)
 
 
36
 
37
 
38
  @spaces.GPU(duration=120)
 
40
  message: str,
41
  chat_history: list[tuple[str, str]],
42
  system_prompt: str,
43
+ model_id: str = "Nekochu/Luminia-13B-v3",
44
  max_new_tokens: int = 1024,
45
  temperature: float = 0.6,
46
  top_p: float = 0.9,
47
  top_k: int = 50,
48
  repetition_penalty: float = 1.2,
49
  ) -> Iterator[str]:
50
+ model, tokenizer = load_model(model_id)
51
  conversation = []
52
  if system_prompt:
53
  conversation.append({"role": "system", "content": system_prompt})
 
86
  fn=generate,
87
  additional_inputs=[
88
  gr.Textbox(label="System prompt", lines=6),
89
+ gr.Textbox(label="Model ID", placeholder="Nekochu/Luminia-13B-v3"),
90
  gr.Slider(
91
  label="Max new tokens",
92
  minimum=1,
 
137
  gr.Markdown(LICENSE)
138
 
139
  if __name__ == "__main__":
140
+ demo.queue(max_size=20).launch()