Spaces:

StevenChen16
/

LLama3-Compliance-Review

Running on Zero

App Files Files Community

StevenChen16 commited on Jun 4, 2024

Commit

5f91d0f

verified ·

1 Parent(s): ab96ee1

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -40

app.py CHANGED Viewed

@@ -5,8 +5,6 @@ subprocess.run(["git", "clone", "https://github.com/hiyouga/LLaMA-Factory.git"],
 # 切换到仓库目录
 import os
 os.chdir("LLaMA-Factory")
-# 列出目录内容
-subprocess.run(["ls"], check=True)
 # 安装unsloth
 subprocess.run(["pip", "install", "unsloth[colab-new]@git+https://github.com/unslothai/unsloth.git"], check=True)
 # 安装xformers
@@ -20,56 +18,41 @@ import gradio as gr
 from llamafactory.chat import ChatModel
 from llamafactory.extras.misc import torch_gc
 import re
-import spaces
-from threading import Thread
 def split_into_sentences(text):
     sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
     sentences = sentence_endings.split(text)
     return [sentence.strip() for sentence in sentences if sentence]
-@spaces.GPU(duration=120)
-def process_sentence(sentence, index, results, messages, progress, total_sentences):
-    messages.append({"role": "user", "content": sentence})
-    sentence_response = ""
-    for new_text in chat_model.stream_chat(messages, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=300):
-        sentence_response += new_text.strip()
-    category = sentence_response.strip().lower().replace(' ', '_')
-    if category != "fair":
-        results[index] = (sentence, category)
-    else:
-        results[index] = (sentence, "fair")
-    messages.append({"role": "assistant", "content": sentence_response})
-    torch_gc()
-    progress((index + 1) / total_sentences)
-@spaces.GPU(duration=120)
 def process_paragraph(paragraph, progress=gr.Progress()):
     sentences = split_into_sentences(paragraph)
-    results = [None] * len(sentences)
     total_sentences = len(sentences)
-    threads = []
     for i, sentence in enumerate(sentences):
-        thread = Thread(target=process_sentence, args=(sentence, i, results, messages.copy(), progress, total_sentences))
-        threads.append(thread)
-        thread.start()
-    for thread in threads:
-        thread.join()
     return results
 args = dict(
-    model_name_or_path="princeton-nlp/Llama-3-Instruct-8B-SimPO",  # 使用量化的 Llama-3-8B-Instruct 模型
-    # model_name_or_path="StevenChen16/llama3-8b-compliance-review",
-    # adapter_name_or_path="StevenChen16/llama3-8b-compliance-review-adapter",                 # 加载保存的 LoRA 适配器
-    template="llama3",                                      # 与训练时使用的模板相同
-    finetuning_type="lora",                                 # 与训练时使用的微调类型相同
-    quantization_bit=8,                                     # 加载 8-bit 量化模型
-    use_unsloth=True,                                       # 使用 UnslothAI 的 LoRA 优化以加速生成
 )
 chat_model = ChatModel(args)
 messages = []
@@ -88,6 +71,7 @@ label_to_color = {
 }
 with gr.Blocks() as demo:
     with gr.Row(equal_height=True):
         with gr.Column():
             input_text = gr.Textbox(label="Input Paragraph", lines=10, placeholder="Enter the paragraph here...")
@@ -102,4 +86,4 @@ with gr.Blocks() as demo:
     btn.click(on_click, inputs=input_text, outputs=[output])
-demo.launch(share=True)

 # 切换到仓库目录
 import os
 os.chdir("LLaMA-Factory")
 # 安装unsloth
 subprocess.run(["pip", "install", "unsloth[colab-new]@git+https://github.com/unslothai/unsloth.git"], check=True)
 # 安装xformers
 from llamafactory.chat import ChatModel
 from llamafactory.extras.misc import torch_gc
 import re
 def split_into_sentences(text):
     sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
     sentences = sentence_endings.split(text)
     return [sentence.strip() for sentence in sentences if sentence]
 def process_paragraph(paragraph, progress=gr.Progress()):
     sentences = split_into_sentences(paragraph)
+    results = []
     total_sentences = len(sentences)
     for i, sentence in enumerate(sentences):
+        progress((i + 1) / total_sentences)
+        messages.append({"role": "user", "content": sentence})
+        sentence_response = ""
+        for new_text in chat_model.stream_chat(messages, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=300):
+            sentence_response += new_text.strip()
+        category = sentence_response.strip().lower().replace(' ', '_')
+        if category != "fair":
+            results.append((sentence, category))
+        else:
+            results.append((sentence, "fair"))
+        messages.append({"role": "assistant", "content": sentence_response})
+        torch_gc()
     return results
+%cd /root/autodl-tmp/LLaMA-Factory/
 args = dict(
+  model_name_or_path="princeton-nlp/Llama-3-Instruct-8B-SimPO",  # 使用量化的 Llama-3-8B-Instruct 模型
+  # model_name_or_path="StevenChen16/llama3-8b-compliance-review",
+  adapter_name_or_path="StevenChen16/llama3-8b-compliance-review-adapter",                 # 加载保存的 LoRA 适配器
+  template="llama3",                                      # 与训练时使用的模板相同
+  finetuning_type="lora",                                 # 与训练时使用的微调类型相同
+  quantization_bit=8,                                     # 加载 4-bit 量化模型
+  use_unsloth=True,                                       # 使用 UnslothAI 的 LoRA 优化以加速生成
 )
 chat_model = ChatModel(args)
 messages = []
 }
 with gr.Blocks() as demo:
     with gr.Row(equal_height=True):
         with gr.Column():
             input_text = gr.Textbox(label="Input Paragraph", lines=10, placeholder="Enter the paragraph here...")
     btn.click(on_click, inputs=input_text, outputs=[output])
+demo.launch(share=True)