MaxBlumenfeld commited on
Commit
3e99dbe
1 Parent(s): d21d1ad

replaced app.py with side by side one

Browse files
Files changed (1) hide show
  1. app.py +119 -37
app.py CHANGED
@@ -2,44 +2,126 @@ import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
4
 
5
- model_id = "MaxBlumenfeld/smollm2-135m-bootleg-instruct"
6
- tokenizer = AutoTokenizer.from_pretrained(model_id)
7
- model = AutoModelForCausalLM.from_pretrained(model_id)
8
-
9
- def generate_response(message, temperature=0.7, max_length=200):
10
- prompt = f"Human: {message}\nAssistant:"
11
- inputs = tokenizer(prompt, return_tensors="pt")
12
-
13
- with torch.no_grad():
14
- outputs = model.generate(
15
- inputs.input_ids,
16
- max_length=max_length,
17
- temperature=temperature,
18
- do_sample=True,
19
- pad_token_id=tokenizer.eos_token_id
20
- )
21
-
22
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
- return response.split("Assistant:")[-1].strip()
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  with gr.Blocks() as demo:
26
- gr.Markdown("# SmolLM2 Bootleg Instruct Chat")
27
-
28
- with gr.Row():
29
- with gr.Column():
30
- message = gr.Textbox(label="Message")
31
- temp = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature")
32
- max_len = gr.Slider(minimum=50, maximum=500, value=200, label="Max Length")
33
- submit = gr.Button("Send")
34
-
35
- with gr.Column():
36
- output = gr.Textbox(label="Response")
37
-
38
- submit.click(
39
- generate_response,
40
- inputs=[message, temp, max_len],
41
- outputs=output
42
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  if __name__ == "__main__":
45
- demo.launch()
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
4
 
5
+ # Load base model from HuggingFace and instruction model from local directory
6
+ base_model_id = "HuggingFaceTB/SmolLM2-135M"
7
+ # instruct_model_path = "5930Final/Fine-tuning/smollm2_finetuned/05" # Updated path
8
+ instruct_model_path = "MaxBlumenfeld/smollm2-135m-bootleg-instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+
11
+ base_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
12
+ # instruct_tokenizer = AutoTokenizer.from_pretrained(instruct_model_path, local_files_only=True)
13
+ instruct_tokenizer = AutoTokenizer.from_pretrained(instruct_model_path)
14
+
15
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_id)
16
+ # instruct_model = AutoModelForCausalLM.from_pretrained(instruct_model_path, local_files_only=True)
17
+ instruct_model = AutoModelForCausalLM.from_pretrained(instruct_model_path)
18
+
19
+
20
+ def generate_response(model, tokenizer, message, temperature=0.5, max_length=200, system_prompt="", is_instruct=False):
21
+ # Prepare input based on model type
22
+ if is_instruct:
23
+ if system_prompt:
24
+ full_prompt = f"{system_prompt}\n\nHuman: {message}\nAssistant:"
25
+ else:
26
+ full_prompt = f"Human: {message}\nAssistant:"
27
+ else:
28
+ # For base model, use simpler prompt format
29
+ full_prompt = message
30
+
31
+ inputs = tokenizer(full_prompt, return_tensors="pt")
32
+
33
+ with torch.no_grad():
34
+ outputs = model.generate(
35
+ inputs.input_ids,
36
+ max_length=max_length,
37
+ do_sample=True,
38
+ temperature=temperature,
39
+ top_k=50,
40
+ top_p=0.95,
41
+ num_return_sequences=1,
42
+ pad_token_id=tokenizer.eos_token_id # Add padding token
43
+ )
44
+
45
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+
47
+ if is_instruct:
48
+ try:
49
+ response = response.split("Assistant:")[-1].strip()
50
+ except:
51
+ pass
52
+ else:
53
+ response = response[len(full_prompt):].strip()
54
+
55
+ return response
56
+
57
+ def chat(message, temperature, max_length, system_prompt):
58
+ # Generate responses from both models
59
+ base_response = generate_response(
60
+ base_model,
61
+ base_tokenizer,
62
+ message,
63
+ temperature,
64
+ max_length,
65
+ system_prompt,
66
+ is_instruct=False
67
+ )
68
+
69
+ instruct_response = generate_response(
70
+ instruct_model,
71
+ instruct_tokenizer,
72
+ message,
73
+ temperature,
74
+ max_length,
75
+ system_prompt,
76
+ is_instruct=True
77
+ )
78
+
79
+ return base_response, instruct_response
80
+
81
+ # Create Gradio interface
82
  with gr.Blocks() as demo:
83
+ gr.Markdown("# SmolLM2-135M Comparison Demo")
84
+ gr.Markdown("Compare responses between base and fine-tuned versions of SmolLM2-135M")
85
+
86
+ with gr.Row():
87
+ with gr.Column():
88
+ message_input = gr.Textbox(label="Input Message")
89
+ system_prompt = gr.Textbox(
90
+ label="System Prompt (Optional)",
91
+ placeholder="Set context or personality for the model",
92
+ lines=3
93
+ )
94
+
95
+ with gr.Column():
96
+ temperature = gr.Slider(
97
+ minimum=0.1,
98
+ maximum=2.0,
99
+ value=0.5,
100
+ label="Temperature"
101
+ )
102
+ max_length = gr.Slider(
103
+ minimum=50,
104
+ maximum=500,
105
+ value=200,
106
+ step=10,
107
+ label="Max Length"
108
+ )
109
+
110
+ with gr.Row():
111
+ with gr.Column():
112
+ gr.Markdown("### Base Model Response")
113
+ base_output = gr.Textbox(label="Base Model (SmolLM2-135M)", lines=5)
114
+
115
+ with gr.Column():
116
+ gr.Markdown("### Bootleg Instruct Model Response")
117
+ instruct_output = gr.Textbox(label="Fine-tuned Model", lines=5)
118
+
119
+ submit_btn = gr.Button("Generate Responses")
120
+ submit_btn.click(
121
+ fn=chat,
122
+ inputs=[message_input, temperature, max_length, system_prompt],
123
+ outputs=[base_output, instruct_output]
124
+ )
125
 
126
  if __name__ == "__main__":
127
+ demo.launch()