BruceLee1234 commited on
Commit
acca2c1
1 Parent(s): 9803328

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -22
app.py CHANGED
@@ -1,31 +1,51 @@
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
  # Load the HelpingAI2.5-2B model
5
  model = AutoModelForCausalLM.from_pretrained("OEvortex/HelpingAI2.5-2B")
6
- # Load the tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI2.5-2B")
8
 
9
- # Define the chat input
10
- chat = [
11
- { "role": "system", "content": "You are HelpingAI, an emotional AI. Always answer my questions in the HelpingAI style." },
12
- { "role": "user", "content": "GIVE ME YOUR INTRO" }
13
- ]
14
-
15
- inputs = tokenizer.apply_chat_template(
16
- chat,
17
- add_generation_prompt=True,
18
- return_tensors="pt"
19
- ).to(model.device)
20
-
21
- # Generate text
22
- outputs = model.generate(
23
- inputs,
24
- max_new_tokens=256,
25
- do_sample=True,
26
- temperature=0.6,
27
- top_p=0.9,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  )
29
 
30
- response = outputs[0][inputs.shape[-1]:]
31
- print(tokenizer.decode(response, skip_special_tokens=True))
 
1
+ import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
  # Load the HelpingAI2.5-2B model
6
  model = AutoModelForCausalLM.from_pretrained("OEvortex/HelpingAI2.5-2B")
 
7
  tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI2.5-2B")
8
 
9
+ # Move model to GPU (if available) or CPU
10
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+ model.to(device)
12
+
13
+ # Define the function for generating responses
14
+ def generate_response(user_input):
15
+ # Define the chat input structure
16
+ chat = [
17
+ { "role": "system", "content": "You are HelpingAI, an emotional AI. Always answer my questions in the HelpingAI style." },
18
+ { "role": "user", "content": user_input }
19
+ ]
20
+
21
+ chat_input = ""
22
+ for message in chat:
23
+ role = message["role"]
24
+ content = message["content"]
25
+ chat_input += f"{role}: {content}\n"
26
+
27
+ # Tokenize the input
28
+ inputs = tokenizer(chat_input, return_tensors="pt").to(device)
29
+
30
+ # Generate text
31
+ outputs = model.generate(
32
+ inputs["input_ids"],
33
+ max_new_tokens=256,
34
+ do_sample=True,
35
+ temperature=0.6,
36
+ top_p=0.9,
37
+ )
38
+
39
+ response = outputs[0][inputs["input_ids"].shape[-1]:]
40
+ return tokenizer.decode(response, skip_special_tokens=True)
41
+
42
+ # Create the Gradio interface
43
+ iface = gr.Interface(
44
+ fn=generate_response,
45
+ inputs="text",
46
+ outputs="text",
47
+ live=True
48
  )
49
 
50
+ # Launch the Gradio app
51
+ iface.launch()