Spaces:

r-neuschulz
/

KafkaLM-8x7b-German-V0.1-DPO-Gradio

Paused

Richard Neuschulz commited on Feb 7

Commit

5683d80

•

1 Parent(s): 411ebc8

into function

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,21 +2,21 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import spaces  # Import the spaces module for ZeroGPU compatibility
-# Load the model and tokenizer from Hugging Face
-model_id = "seedboxai/KafkaLM-8x7B-German-V0.1-DPO"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, load_in_8bit=False, trust_remote_code=True).to('cuda')  # Move the model to GPU
 # Define the text generation function
 @spaces.GPU  # Decorate this function to use GPU
 def generate_text(user_input, system_prompt):
     # Combine the system prompt and the user input to form the full prompt
     full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
-    # Initialize the pipeline for text generation
     text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
                               return_full_text=True, temperature=0.5,
-                              max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0)  # Specify the device for the pipeline
     # Generate text based on the full prompt
     results = text_generator(full_prompt)

 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import spaces  # Import the spaces module for ZeroGPU compatibility
 # Define the text generation function
 @spaces.GPU  # Decorate this function to use GPU
 def generate_text(user_input, system_prompt):
+    # Load the model and tokenizer from Hugging Face within the function
+    model_id = "seedboxai/KafkaLM-8x7B-German-V0.1-DPO"
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, load_in_8bit=False, trust_remote_code=True).to('cuda')  # Move the model to GPU here
     # Combine the system prompt and the user input to form the full prompt
     full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
+    # Initialize the pipeline for text generation with the model and tokenizer
     text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
                               return_full_text=True, temperature=0.5,
+                              max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0)  # Ensure device is set to use GPU
     # Generate text based on the full prompt
     results = text_generator(full_prompt)