Spaces:

StevenChen16
/

LLama3-Compliance-Review

Running on Zero

App Files Files Community

StevenChen16 commited on Jun 4, 2024

Commit

f9b65c3

verified ·

1 Parent(s): 372f45f

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -43

app.py CHANGED Viewed

@@ -1,58 +1,44 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-from peft import PeftModel
-from threading import Thread
 import re
-import torch
-import spaces
-# 分割段落为句子
 def split_into_sentences(text):
     sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
     sentences = sentence_endings.split(text)
     return [sentence.strip() for sentence in sentences if sentence]
-# 处理段落
-@spaces.GPU(duration=120)
 def process_paragraph(paragraph, progress=gr.Progress()):
-    print("Process_Paragraph Function has been called")
     sentences = split_into_sentences(paragraph)
     results = []
     total_sentences = len(sentences)
-    print("sentences: ", sentences)
     for i, sentence in enumerate(sentences):
         progress((i + 1) / total_sentences)
-        input_ids = tokenizer.encode(sentence, return_tensors='pt').to(device)
-        output = model.generate(input_ids,
-                                max_new_tokens=50,
-                                eos_token_id=terminators,
-                                temperature=0.9,
-                                do_sample=True,
-                               )
-        sentence_response = tokenizer.decode(output[0], skip_special_tokens=True).strip()
-        category = sentence_response.lower().replace(' ', '_')
-        print("Single Sentence: ", sentence)
-        print("category: ", category)
         if category != "fair":
             results.append((sentence, category))
         else:
             results.append((sentence, "fair"))
     return results
-# 模型和分词器
-MODEL_NAME_OR_PATH = "StevenChen16/llama3-8b-compliance-review"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME_OR_PATH)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-terminators = [
-    tokenizer.eos_token_id,
-    tokenizer.convert_tokens_to_ids("<|eot_id|>")
-]
-model.to(device)
 # 定义类型到颜色的映射
 label_to_color = {
@@ -67,14 +53,7 @@ label_to_color = {
     "arbitration": "brown",
 }
-css = """
-<style>
-  .gradio-container { height: auto; max-height: 500px; overflow-y: scroll; }
-</style>
-"""
 with gr.Blocks() as demo:
-    gr.Markdown(css)
     with gr.Row(equal_height=True):
         with gr.Column():
@@ -90,4 +69,4 @@ with gr.Blocks() as demo:
     btn.click(on_click, inputs=input_text, outputs=[output])
-demo.launch()

 import gradio as gr
+from llamafactory.chat import ChatModel
+from llamafactory.extras.misc import torch_gc
 import re
 def split_into_sentences(text):
     sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
     sentences = sentence_endings.split(text)
     return [sentence.strip() for sentence in sentences if sentence]
 def process_paragraph(paragraph, progress=gr.Progress()):
     sentences = split_into_sentences(paragraph)
     results = []
     total_sentences = len(sentences)
     for i, sentence in enumerate(sentences):
         progress((i + 1) / total_sentences)
+        messages.append({"role": "user", "content": sentence})
+        sentence_response = ""
+        for new_text in chat_model.stream_chat(messages, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=300):
+            sentence_response += new_text.strip()
+        category = sentence_response.strip().lower().replace(' ', '_')
         if category != "fair":
             results.append((sentence, category))
         else:
             results.append((sentence, "fair"))
+        messages.append({"role": "assistant", "content": sentence_response})
+        torch_gc()
     return results
+%cd /root/autodl-tmp/LLaMA-Factory/
+args = dict(
+  model_name_or_path="StevenChen16/llama3-8b-compliance-review",  # 使用量化的 Llama-3-8B-Instruct 模型
+  # adapter_name_or_path="llama3_cr_sft_5",                 # 加载保存的 LoRA 适配器
+  template="llama3",                                      # 与训练时使用的模板相同
+  finetuning_type="lora",                                 # 与训练时使用的微调类型相同
+  quantization_bit=8,                                     # 加载 4-bit 量化模型
+  use_unsloth=True,                                       # 使用 UnslothAI 的 LoRA 优化以加速生成
+)
+chat_model = ChatModel(args)
+messages = []
 # 定义类型到颜色的映射
 label_to_color = {
     "arbitration": "brown",
 }
 with gr.Blocks() as demo:
     with gr.Row(equal_height=True):
         with gr.Column():
     btn.click(on_click, inputs=input_text, outputs=[output])
+demo.launch(share=True)