StevenChen16 commited on
Commit
94f56db
·
verified ·
1 Parent(s): 714189c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -14
app.py CHANGED
@@ -1,36 +1,43 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
  import re
5
- import spaces
6
 
7
  def split_into_sentences(text):
8
  sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
9
  sentences = sentence_endings.split(text)
10
  return [sentence.strip() for sentence in sentences if sentence]
11
 
12
- @spaces.GPU
13
- def process_paragraph(paragraph, model, tokenizer, device, progress=gr.Progress()):
14
  sentences = split_into_sentences(paragraph)
15
  results = []
16
  total_sentences = len(sentences)
17
  for i, sentence in enumerate(sentences):
18
  progress((i + 1) / total_sentences)
19
- inputs = tokenizer(sentence, return_tensors="pt").to(device) # 将输入数据移动到模型所在设备
20
- outputs = model.generate(**inputs, max_new_tokens=300)
21
- sentence_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
22
  category = sentence_response.strip().lower().replace(' ', '_')
23
  if category != "fair":
24
  results.append((sentence, category))
25
  else:
26
  results.append((sentence, "fair"))
 
 
27
  return results
28
 
29
- # 加载模型和tokenizer
30
- model_name = "StevenChen16/llama3-8b-compliance-review"
31
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
- model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
33
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
34
 
35
  # 定义类型到颜色的映射
36
  label_to_color = {
@@ -56,7 +63,7 @@ with gr.Blocks() as demo:
56
  progress = gr.Progress()
57
 
58
  def on_click(paragraph):
59
- results = process_paragraph(paragraph, model, tokenizer, device, progress=progress)
60
  return results
61
 
62
  btn.click(on_click, inputs=input_text, outputs=[output])
 
1
  import gradio as gr
2
+ from llamafactory.chat import ChatModel
3
+ from llamafactory.extras.misc import torch_gc
4
  import re
 
5
 
6
  def split_into_sentences(text):
7
  sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
8
  sentences = sentence_endings.split(text)
9
  return [sentence.strip() for sentence in sentences if sentence]
10
 
11
+ def process_paragraph(paragraph, progress=gr.Progress()):
 
12
  sentences = split_into_sentences(paragraph)
13
  results = []
14
  total_sentences = len(sentences)
15
  for i, sentence in enumerate(sentences):
16
  progress((i + 1) / total_sentences)
17
+ messages.append({"role": "user", "content": sentence})
18
+ sentence_response = ""
19
+ for new_text in chat_model.stream_chat(messages, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=300):
20
+ sentence_response += new_text.strip()
21
  category = sentence_response.strip().lower().replace(' ', '_')
22
  if category != "fair":
23
  results.append((sentence, category))
24
  else:
25
  results.append((sentence, "fair"))
26
+ messages.append({"role": "assistant", "content": sentence_response})
27
+ torch_gc()
28
  return results
29
 
30
+
31
+ args = dict(
32
+ model_name_or_path="princeton-nlp/Llama-3-Instruct-8B-SimPO", # 使用量化的 Llama-3-8B-Instruct 模型
33
+ adapter_name_or_path="StevenChen16/llama3-8b-compliance-review-adapter", # 加载保存的 LoRA 适配器
34
+ template="llama3", # 与训练时使用的模板相同
35
+ finetuning_type="lora", # 与训练时使用的微调类型相同
36
+ quantization_bit=8, # 加载 4-bit 量化模型
37
+ use_unsloth=True, # 使用 UnslothAI 的 LoRA 优化以加速生成
38
+ )
39
+ chat_model = ChatModel(args)
40
+ messages = []
41
 
42
  # 定义类型到颜色的映射
43
  label_to_color = {
 
63
  progress = gr.Progress()
64
 
65
  def on_click(paragraph):
66
+ results = process_paragraph(paragraph, progress=progress)
67
  return results
68
 
69
  btn.click(on_click, inputs=input_text, outputs=[output])