Spaces:

migueldeguzmandev
/

RLLMv3.2-10

Runtime error

App Files Files Community

migueldeguzmandev commited on Apr 25

Commit

c919b63

•

1 Parent(s): 6b85548

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -9

app.py CHANGED Viewed

@@ -2,17 +2,28 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load the model and tokenizer
-model_name = "migueldeguzmandev/RLLMv3.2-10"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
 # Define the inference function
-def generate_response(input_text):
     # Tokenize the input text
-    input_ids = tokenizer.encode(input_text, return_tensors="pt")
     # Generate the model's response
-    output = model.generate(input_ids, max_length=300, num_return_sequences=1)
     # Decode the generated response
     response = tokenizer.decode(output[0], skip_special_tokens=True)
@@ -22,11 +33,14 @@ def generate_response(input_text):
 # Create the Gradio interface
 interface = gr.Interface(
     fn=generate_response,
-    inputs=gr.Textbox(label="User Input"),
     outputs=gr.Textbox(label="Model Response"),
-    title="Conversation with a modified GPT2XL, RLLMv3.2-10",
-    description="Enter your message and the model will generate a response.",
 )
-# Launch the interface
-interface.launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load the model and tokenizer
+model_name = "migueldeguzmandev/migueldeguzmandev-RLLMv3.2-10"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
+# Set the pad token ID to the EOS token ID
+model.config.pad_token_id = model.config.eos_token_id
 # Define the inference function
+def generate_response(input_text, temperature):
     # Tokenize the input text
+    inputs = tokenizer(input_text, return_tensors="pt")
+    input_ids = inputs["input_ids"]
+    attention_mask = inputs["attention_mask"]
     # Generate the model's response
+    output = model.generate(
+        input_ids,
+        attention_mask=attention_mask,
+        max_length=1024,
+        num_return_sequences=1,
+        temperature=temperature,
+    )
     # Decode the generated response
     response = tokenizer.decode(output[0], skip_special_tokens=True)
 # Create the Gradio interface
 interface = gr.Interface(
     fn=generate_response,
+    inputs=[
+        gr.Textbox(label="User Input"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.0000000000000000000000000000001, step=0.1, label="Temperature"),
+    ],
     outputs=gr.Textbox(label="Model Response"),
+    title="Conversation with migueldeguzmandev-RLLMv3.2-10",
+    description="Enter your message and adjust the temperature, then the model will generate a response.",
 )
+# Launch the interface with the share option set to True
+interface.launch(share=True)