mudogruer commited on
Commit
617afda
1 Parent(s): 22eb52d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -80
app.py CHANGED
@@ -7,11 +7,13 @@ Original file is located at
7
  https://colab.research.google.com/drive/1goHcmXF0Gc4_X9PN-zecV77j9KeI6Dmn
8
  """
9
 
10
- #!pip install -q gradio
 
 
 
 
11
 
12
- #!pip install torch transformers accelerate einops
13
 
14
- #!pip install peft
15
 
16
  import gradio as gr
17
 
@@ -22,7 +24,6 @@ from transformers import (
22
  TextIteratorStreamer,
23
  pipeline,
24
  )
25
- from threading import Thread
26
 
27
  # The huggingface model id for Microsoft's phi-2 model
28
  # Download and load model and tokenizer
@@ -32,8 +33,8 @@ tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=T
32
  from peft import PeftModel, PeftConfig
33
 
34
  config = PeftConfig.from_pretrained("mudogruer/phi-2-SciQ")
35
- model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
36
- model = PeftModel.from_pretrained(model, "mudogruer/phi-2-SciQ")
37
 
38
  # Text generation pipeline
39
  phi2 = pipeline(
@@ -45,79 +46,22 @@ phi2 = pipeline(
45
  device_map="cpu",
46
  )
47
 
48
-
49
- # Function that accepts a prompt and generates text using the phi2 pipeline
50
- def generate(message, chat_history, max_new_tokens):
51
  instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
52
- final_prompt = f"Instruction: {instruction}\n"
53
-
54
- for sent, received in chat_history:
55
- final_prompt += "User: " + sent + "\n"
56
- final_prompt += "Assistant: " + received + "\n"
57
-
58
- final_prompt += "User: " + message + "\n"
59
- final_prompt += "Output:"
60
-
61
- # if (
62
- # len(tokenizer.tokenize(final_prompt))
63
- # >= tokenizer.model_max_length - max_new_tokens
64
- # ):
65
- # final_prompt = "Instruction: Say 'Input exceeded context size, please clear the chat history and retry!' Output:"
66
-
67
- # Streamer
68
- streamer = TextIteratorStreamer(
69
- tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
70
- )
71
- thread = Thread(
72
- target=phi2,
73
- kwargs={
74
- "text_inputs": final_prompt,
75
- "max_new_tokens": max_new_tokens,
76
- "streamer": streamer,
77
- },
78
- )
79
- thread.start()
80
-
81
- generated_text = ""
82
- for word in streamer:
83
- generated_text += word
84
- response = generated_text.strip()
85
-
86
- if "User:" in response:
87
- response = response.split("User:")[0].strip()
88
-
89
- if "Assistant:" in response:
90
- response = response.split("Assistant:")[1].strip()
91
-
92
- yield response
93
-
94
-
95
- # Chat interface with gradio
96
  with gr.Blocks() as demo:
97
- gr.Markdown(
98
- """
99
- # Phi-2 Scientific Question Chatbot
100
- This chatbot was created using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
101
-
102
- Phi-2 model was fine-tuned with questions including highschool level physics chemistry biology QA using SciQ dataset. In order to reduce the response time on this hardware, `max_new_tokens` has been set to `21` in the text generation pipeline. With this default configuration, it takes approximately `60 seconds` for the response to start being generated, and streamed one word at a time. Use the slider below to increase or decrease the length of the generated text.
103
-
104
- For the safetensor: huggingface.co/mudogruer
105
- """
106
- )
107
-
108
- tokens_slider = gr.Slider(
109
- 8,
110
- 128,
111
- value=21,
112
- label="Maximum new tokens",
113
- info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.",
114
- )
115
-
116
- chatbot = gr.ChatInterface(
117
- fn=generate,
118
- additional_inputs=[tokens_slider],
119
- stop_btn=None,
120
- examples=[["Who is Leonhard Euler?"]],
121
- )
122
-
123
- demo.queue().launch()
 
7
  https://colab.research.google.com/drive/1goHcmXF0Gc4_X9PN-zecV77j9KeI6Dmn
8
  """
9
 
10
+ # !pip install -q -U gradio
11
+ # !pip install -q -U torch transformers accelerate einops
12
+ # !pip install -q peft
13
+
14
+
15
 
 
16
 
 
17
 
18
  import gradio as gr
19
 
 
24
  TextIteratorStreamer,
25
  pipeline,
26
  )
 
27
 
28
  # The huggingface model id for Microsoft's phi-2 model
29
  # Download and load model and tokenizer
 
33
  from peft import PeftModel, PeftConfig
34
 
35
  config = PeftConfig.from_pretrained("mudogruer/phi-2-SciQ")
36
+ base_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
37
+ model = PeftModel.from_pretrained(base_model, "mudogruer/phi-2-SciQ")
38
 
39
  # Text generation pipeline
40
  phi2 = pipeline(
 
46
  device_map="cpu",
47
  )
48
 
49
+ def generate(message, max_new_tokens):
 
 
50
  instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
51
+ final_prompt = f"Instruction: {instruction}\nUser: {message}\nOutput:"
52
+
53
+ # Generate text synchronously
54
+ response = phi2(final_prompt, max_new_tokens=max_new_tokens)
55
+ generated_text = response[0]['generated_text']
56
+
57
+ # Process to extract the last assistant's response
58
+ # Assuming the last line after 'Output:' is the response
59
+ last_response = generated_text.split('Output:')[-1].strip()
60
+ return last_response
61
+
62
+ # Update the Gradio interface setup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  with gr.Blocks() as demo:
64
+ gr.Markdown("""### Phi-2 Scientific Question Chatbot""")
65
+ tokens_slider = gr.Slider(8, 128, value=21, label="Maximum new tokens")
66
+ chatbot = gr.Interface(fn=generate, inputs=["text", tokens_slider], outputs="text")
67
+ demo.launch()