hosseinhimself commited on
Commit
fa5cabf
·
verified ·
1 Parent(s): 8a7fb58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -68
app.py CHANGED
@@ -1,81 +1,73 @@
1
  import gradio as gr
 
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import os
5
 
6
- os.system("pip install -U bitsandbytes")
 
 
 
 
 
7
 
8
- model_name = "hosseinhimself/ISANG-v1.0-8B"
9
 
10
- # Ensure CUDA is not used
11
- torch.set_default_device("cpu")
 
 
 
 
 
 
12
 
13
- # Load tokenizer globally
14
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
15
 
16
- def load_model():
17
- try:
18
- model = AutoModelForCausalLM.from_pretrained(
19
- model_name,
20
- torch_dtype=torch.float32, # Ensure compatibility with CPU
21
- trust_remote_code=True,
22
- low_cpu_mem_usage=True # Optimize memory usage
23
  )
24
- model.to("cpu") # Explicitly load the model on CPU
25
- print("Model loaded successfully on CPU.")
26
- return model
 
 
 
 
 
 
 
27
  except Exception as e:
28
- print(f"Error loading model: {e}")
29
- raise
30
-
31
-
32
- def stream_chat(prompt, history):
33
- model = load_model()
34
- # Add system prompt
35
- system_prompt = "You are ISANG, a multilingual large language model made by ISANG AI. You only respond in Persian, Korean, or English. If a user uses one of these languages, reply in the same language."
36
-
37
- # Format the conversation history with system prompt
38
- context = f"System: {system_prompt}\n"
39
- for user_message, bot_message in history:
40
- context += f"User: {user_message}\nBot: {bot_message}\n"
41
- context += f"User: {prompt}\nBot:"
42
-
43
- # Generate a response incrementally
44
- inputs = tokenizer(context, return_tensors="pt", truncation=True, max_length=512)
45
- output_ids = model.generate(
46
- **inputs,
47
- max_new_tokens=200,
48
- temperature=0.7,
49
- do_sample=True,
50
- return_dict_in_generate=True,
51
- output_scores=False
 
52
  )
53
 
54
- response_ids = output_ids.sequences[0]
55
- decoded_text = tokenizer.decode(response_ids, skip_special_tokens=True)
56
-
57
- # Stream response word by word
58
- response = decoded_text[len(context):].strip()
59
- words = response.split()
60
- history.append((prompt, "")) # Add the prompt to history with an empty response initially
61
- for i, word in enumerate(words):
62
- # Append the next word to the history
63
- history[-1] = (prompt, " ".join(words[: i + 1]))
64
- yield history, " ".join(words[: i + 1]) # Stream the current response
65
-
66
- gradio_app = gr.Interface(
67
- fn=stream_chat,
68
- inputs=[gr.Textbox(lines=2, placeholder="Enter your message here..."), "state"],
69
- outputs=["state", "text"],
70
- title="ISANG Chatbot",
71
- description="This is a chatbot powered by the ISANG model. Enter your messages to chat with it!",
72
- examples=[
73
- ["سلام، چطوری؟"],
74
- ["برام یه داستان تعریف کن"],
75
- ["نظرت درباره هوش مصنوعی چیه؟"]
76
- ],
77
- live=True # Enable live streaming for Gradio
78
- )
79
 
80
  if __name__ == "__main__":
81
- gradio_app.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
 
4
 
5
+ # Load the model and tokenizer
6
+ def load_model():
7
+ model_name = "hosseinhimself/ISANG-v1.0-8B"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
10
+ return tokenizer, model
11
 
12
+ tokenizer, model = load_model()
13
 
14
+ # Define the inference function
15
+ def chat(input_text, max_tokens, temperature, history):
16
+ try:
17
+ # Concatenate history to the input prompt
18
+ if history:
19
+ input_text = history + "\nUser: " + input_text + "\nAssistant:"
20
+ else:
21
+ input_text = "User: " + input_text + "\nAssistant:"
22
 
23
+ inputs = tokenizer.encode(input_text, return_tensors="pt")
 
24
 
25
+ # Generate the response
26
+ output = model.generate(
27
+ inputs,
28
+ max_length=inputs.shape[1] + max_tokens,
29
+ temperature=temperature,
30
+ do_sample=True
 
31
  )
32
+
33
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
34
+
35
+ # Extract the assistant's reply and update history
36
+ response_start = output_text.find("Assistant:") + len("Assistant:")
37
+ response = output_text[response_start:].strip()
38
+ new_history = input_text + response
39
+
40
+ return response, new_history
41
+
42
  except Exception as e:
43
+ return str(e), ""
44
+
45
+ # Gradio interface
46
+ def reset_history():
47
+ return ""
48
+
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown("# ISANG-v1.0-8B Chatbot")
51
+
52
+ with gr.Row():
53
+ with gr.Column():
54
+ user_input = gr.Textbox(label="Your Input", placeholder="Type your message here...")
55
+ max_tokens = gr.Slider(minimum=10, maximum=512, value=256, label="Max Tokens")
56
+ temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature")
57
+ history = gr.Textbox(label="Conversation History", value="", visible=False)
58
+ send_button = gr.Button("Send")
59
+ clear_button = gr.Button("Clear History")
60
+
61
+ with gr.Column():
62
+ chat_output = gr.Textbox(label="Assistant's Response", lines=10)
63
+
64
+ send_button.click(
65
+ chat,
66
+ inputs=[user_input, max_tokens, temperature, history],
67
+ outputs=[chat_output, history]
68
  )
69
 
70
+ clear_button.click(reset_history, outputs=[history])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  if __name__ == "__main__":
73
+ demo.launch(enable_queue=True, share=True)