mudogruer commited on
Commit
893073b
1 Parent(s): 4eb65ab

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -0
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled12.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1goHcmXF0Gc4_X9PN-zecV77j9KeI6Dmn
8
+ """
9
+
10
+ #!pip install -q gradio
11
+
12
+ #!pip install torch transformers accelerate einops
13
+
14
+ #!pip install peft
15
+
16
+ import gradio as gr
17
+
18
+ import torch
19
+ from transformers import (
20
+ AutoTokenizer,
21
+ AutoModelForCausalLM,
22
+ TextIteratorStreamer,
23
+ pipeline,
24
+ )
25
+ from threading import Thread
26
+
27
+ # The huggingface model id for Microsoft's phi-2 model
28
+ # Download and load model and tokenizer
29
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
30
+
31
+ #Download safetensor of adapter of fine-tune Phi-2 model
32
+ from peft import PeftModel, PeftConfig
33
+
34
+ config = PeftConfig.from_pretrained("mudogruer/phi-2-SciQ")
35
+ model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
36
+ model = PeftModel.from_pretrained(model, "mudogruer/phi-2-SciQ")
37
+
38
+ # Text generation pipeline
39
+ phi2 = pipeline(
40
+ "text-generation",
41
+ tokenizer=tokenizer,
42
+ model=model,
43
+ pad_token_id=tokenizer.eos_token_id,
44
+ eos_token_id=tokenizer.eos_token_id,
45
+ device_map="cpu",
46
+ )
47
+
48
+
49
+ # Function that accepts a prompt and generates text using the phi2 pipeline
50
+ def generate(message, chat_history, max_new_tokens):
51
+ instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
52
+ final_prompt = f"Instruction: {instruction}\n"
53
+
54
+ for sent, received in chat_history:
55
+ final_prompt += "User: " + sent + "\n"
56
+ final_prompt += "Assistant: " + received + "\n"
57
+
58
+ final_prompt += "User: " + message + "\n"
59
+ final_prompt += "Output:"
60
+
61
+ if (
62
+ len(tokenizer.tokenize(final_prompt))
63
+ >= tokenizer.model_max_length - max_new_tokens
64
+ ):
65
+ final_prompt = "Instruction: Say 'Input exceeded context size, please clear the chat history and retry!' Output:"
66
+
67
+ # Streamer
68
+ streamer = TextIteratorStreamer(
69
+ tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
70
+ )
71
+ thread = Thread(
72
+ target=phi2,
73
+ kwargs={
74
+ "text_inputs": final_prompt,
75
+ "max_new_tokens": max_new_tokens,
76
+ "streamer": streamer,
77
+ },
78
+ )
79
+ thread.start()
80
+
81
+ generated_text = ""
82
+ for word in streamer:
83
+ generated_text += word
84
+ response = generated_text.strip()
85
+
86
+ if "User:" in response:
87
+ response = response.split("User:")[0].strip()
88
+
89
+ if "Assistant:" in response:
90
+ response = response.split("Assistant:")[1].strip()
91
+
92
+ yield response
93
+
94
+
95
+ # Chat interface with gradio
96
+ with gr.Blocks() as demo:
97
+ gr.Markdown(
98
+ """
99
+ # Phi-2 Scientific Question Chatbot
100
+ This chatbot was created using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
101
+
102
+ Phi-2 model was fine-tuned with questions including physics chemistry biology QA using SciQ dataset. In order to reduce the response time on this hardware, `max_new_tokens` has been set to `21` in the text generation pipeline. With this default configuration, it takes approximately `60 seconds` for the response to start being generated, and streamed one word at a time. Use the slider below to increase or decrease the length of the generated text.
103
+
104
+ For the safetensor: huggingface.co/mudogruer
105
+ """
106
+ )
107
+
108
+ tokens_slider = gr.Slider(
109
+ 8,
110
+ 128,
111
+ value=21,
112
+ label="Maximum new tokens",
113
+ info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.",
114
+ )
115
+
116
+ chatbot = gr.ChatInterface(
117
+ fn=generate,
118
+ additional_inputs=[tokens_slider],
119
+ stop_btn=None,
120
+ examples=[["Who is Leonhard Euler?"]],
121
+ )
122
+
123
+ demo.queue().launch()