Chat-Llama-3.2-3B-Instruct-uncensored

Running

App Files Files Community

Sadmanteemi commited on 19 days ago

Commit

57b0e45

•

1 Parent(s): ebc169c

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -29

app.py CHANGED Viewed

@@ -33,17 +33,14 @@ As a derivate work of [Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/
 this demo is governed by the original [license](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE).
 """
-# if not torch.cuda.is_available():
-#     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 if torch.cuda.is_available() or os.getenv("ZERO_GPU_SUPPORT", False):
     model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
 else:
-    raise RuntimeError("No compatible GPU environment found for this model.")
 @spaces.GPU
 def generate(
@@ -102,27 +99,6 @@ chat_interface = gr.ChatInterface(
             step=0.1,
             value=0.6,
         ),
-        # gr.Slider(
-        #     label="Top-p (nucleus sampling)",
-        #     minimum=0.05,
-        #     maximum=1.0,
-        #     step=0.05,
-        #     value=0.9,
-        # ),
-        # gr.Slider(
-        #     label="Top-k",
-        #     minimum=1,
-        #     maximum=1000,
-        #     step=1,
-        #     value=50,
-        # ),
-        # gr.Slider(
-        #     label="Repetition penalty",
-        #     minimum=1.0,
-        #     maximum=2.0,
-        #     step=0.05,
-        #     value=1.2,
-        # ),
     ],
     stop_btn=None,
     examples=[
@@ -133,9 +109,8 @@ chat_interface = gr.ChatInterface(
 with gr.Blocks(css="style.css", fill_height=True) as demo:
     gr.Markdown(DESCRIPTION)
-    # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     chat_interface.render()
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

 this demo is governed by the original [license](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE).
 """
 if torch.cuda.is_available() or os.getenv("ZERO_GPU_SUPPORT", False):
     model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
 else:
+    model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
+    model = AutoModelForCausalLM.from_pretrained(model_id)
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU
 def generate(
             step=0.1,
             value=0.6,
         ),
     ],
     stop_btn=None,
     examples=[
 with gr.Blocks(css="style.css", fill_height=True) as demo:
     gr.Markdown(DESCRIPTION)
     chat_interface.render()
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()