nikravan commited on
Commit
8ec3e64
verified
1 Parent(s): 62ebda0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -74
app.py CHANGED
@@ -1,59 +1,4 @@
1
- import os
2
- import json
3
- import subprocess
4
- from threading import Thread
5
-
6
- import torch
7
- import spaces
8
- import gradio as gr
9
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
10
-
11
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
-
13
- MODEL_ID = "nikravan/Marco-O1-q4"
14
- CHAT_TEMPLATE = "ChatML"
15
- MODEL_NAME = MODEL_ID.split("/")[-1]
16
- CONTEXT_LENGTH = 16000
17
-
18
- # Estableciendo valores directamente para las variables
19
- COLOR = "blue" # Color predeterminado de la interfaz
20
- EMOJI = "馃" # Emoji predeterminado para el modelo
21
- DESCRIPTION = f"This is the {MODEL_NAME} model designed for testing thinking for general AI tasks." # Descripci贸n predeterminada
22
-
23
- latex_delimiters_set = [{
24
- "left": "\\(",
25
- "right": "\\)",
26
- "display": False
27
- }, {
28
- "left": "\\begin{equation}",
29
- "right": "\\end{equation}",
30
- "display": True
31
- }, {
32
- "left": "\\begin{align}",
33
- "right": "\\end{align}",
34
- "display": True
35
- }, {
36
- "left": "\\begin{alignat}",
37
- "right": "\\end{alignat}",
38
- "display": True
39
- }, {
40
- "left": "\\begin{gather}",
41
- "right": "\\end{gather}",
42
- "display": True
43
- }, {
44
- "left": "\\begin{CD}",
45
- "right": "\\end{CD}",
46
- "display": True
47
- }, {
48
- "left": "\\[",
49
- "right": "\\]",
50
- "display": True
51
- }]
52
-
53
-
54
- @spaces.GPU()
55
  def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
56
- # Format history with a given chat template
57
  if CHAT_TEMPLATE == "Auto":
58
  stop_tokens = [tokenizer.eos_token_id]
59
  instruction = system_prompt + "\n\n"
@@ -102,31 +47,16 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
102
  outputs.append(new_token)
103
  if new_token in stop_tokens:
104
  break
105
- yield "".join(outputs)
106
-
 
107
 
108
- # Load model
109
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
110
- quantization_config = BitsAndBytesConfig(
111
- load_in_4bit=True,
112
- bnb_4bit_compute_dtype=torch.bfloat16
113
- )
114
- tokenizer = AutoTokenizer.from_pretrained('AIDC-AI/Marco-o1')
115
- model = AutoModelForCausalLM.from_pretrained(
116
- MODEL_ID,
117
- device_map="auto",
118
- quantization_config=quantization_config,
119
- attn_implementation="flash_attention_2",
120
- )
121
 
122
  # Create Gradio interface
123
  gr.ChatInterface(
124
  predict,
125
  title=EMOJI + " " + MODEL_NAME,
126
  description=DESCRIPTION,
127
-
128
-
129
-
130
  additional_inputs_accordion=gr.Accordion(label="鈿欙笍 Parameters", open=False),
131
  additional_inputs=[
132
  gr.Textbox("You are a code assistant.", label="System prompt"),
@@ -138,4 +68,3 @@ gr.ChatInterface(
138
  ],
139
  theme=gr.themes.Soft(primary_hue=COLOR),
140
  ).queue().launch()
141
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
 
2
  if CHAT_TEMPLATE == "Auto":
3
  stop_tokens = [tokenizer.eos_token_id]
4
  instruction = system_prompt + "\n\n"
 
47
  outputs.append(new_token)
48
  if new_token in stop_tokens:
49
  break
50
+ result = "".join(outputs)
51
+ # Wrapping result in Markdown for LaTeX rendering
52
+ yield f"$$ {result} $$"
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Create Gradio interface
56
  gr.ChatInterface(
57
  predict,
58
  title=EMOJI + " " + MODEL_NAME,
59
  description=DESCRIPTION,
 
 
 
60
  additional_inputs_accordion=gr.Accordion(label="鈿欙笍 Parameters", open=False),
61
  additional_inputs=[
62
  gr.Textbox("You are a code assistant.", label="System prompt"),
 
68
  ],
69
  theme=gr.themes.Soft(primary_hue=COLOR),
70
  ).queue().launch()