Spaces:

StevenChen16
/

LLama3-Compliance-Review

Running on Zero

App Files Files Community

StevenChen16 commited on Jun 12, 2024

Commit

94f56db

verified ·

1 Parent(s): 714189c

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -14

app.py CHANGED Viewed

@@ -1,36 +1,43 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
 import re
-import spaces
 def split_into_sentences(text):
     sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
     sentences = sentence_endings.split(text)
     return [sentence.strip() for sentence in sentences if sentence]
-@spaces.GPU
-def process_paragraph(paragraph, model, tokenizer, device, progress=gr.Progress()):
     sentences = split_into_sentences(paragraph)
     results = []
     total_sentences = len(sentences)
     for i, sentence in enumerate(sentences):
         progress((i + 1) / total_sentences)
-        inputs = tokenizer(sentence, return_tensors="pt").to(device)  # 将输入数据移动到模型所在设备
-        outputs = model.generate(**inputs, max_new_tokens=300)
-        sentence_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         category = sentence_response.strip().lower().replace(' ', '_')
         if category != "fair":
             results.append((sentence, category))
         else:
             results.append((sentence, "fair"))
     return results
-# 加载模型和tokenizer
-model_name = "StevenChen16/llama3-8b-compliance-review"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 # 定义类型到颜色的映射
 label_to_color = {
@@ -56,7 +63,7 @@ with gr.Blocks() as demo:
             progress = gr.Progress()
     def on_click(paragraph):
-        results = process_paragraph(paragraph, model, tokenizer, device, progress=progress)
         return results
     btn.click(on_click, inputs=input_text, outputs=[output])

 import gradio as gr
+from llamafactory.chat import ChatModel
+from llamafactory.extras.misc import torch_gc
 import re
 def split_into_sentences(text):
     sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
     sentences = sentence_endings.split(text)
     return [sentence.strip() for sentence in sentences if sentence]
+def process_paragraph(paragraph, progress=gr.Progress()):
     sentences = split_into_sentences(paragraph)
     results = []
     total_sentences = len(sentences)
     for i, sentence in enumerate(sentences):
         progress((i + 1) / total_sentences)
+        messages.append({"role": "user", "content": sentence})
+        sentence_response = ""
+        for new_text in chat_model.stream_chat(messages, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=300):
+            sentence_response += new_text.strip()
         category = sentence_response.strip().lower().replace(' ', '_')
         if category != "fair":
             results.append((sentence, category))
         else:
             results.append((sentence, "fair"))
+        messages.append({"role": "assistant", "content": sentence_response})
+        torch_gc()
     return results
+args = dict(
+  model_name_or_path="princeton-nlp/Llama-3-Instruct-8B-SimPO",  # 使用量化的 Llama-3-8B-Instruct 模型
+  adapter_name_or_path="StevenChen16/llama3-8b-compliance-review-adapter",                 # 加载保存的 LoRA 适配器
+  template="llama3",                                      # 与训练时使用的模板相同
+  finetuning_type="lora",                                 # 与训练时使用的微调类型相同
+  quantization_bit=8,                                     # 加载 4-bit 量化模型
+  use_unsloth=True,                                       # 使用 UnslothAI 的 LoRA 优化以加速生成
+)
+chat_model = ChatModel(args)
+messages = []
 # 定义类型到颜色的映射
 label_to_color = {
             progress = gr.Progress()
     def on_click(paragraph):
+        results = process_paragraph(paragraph, progress=progress)
         return results
     btn.click(on_click, inputs=input_text, outputs=[output])