Richard Neuschulz commited on
Commit
5683d80
1 Parent(s): 411ebc8

into function

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -2,21 +2,21 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import spaces # Import the spaces module for ZeroGPU compatibility
4
 
5
- # Load the model and tokenizer from Hugging Face
6
- model_id = "seedboxai/KafkaLM-8x7B-German-V0.1-DPO"
7
- tokenizer = AutoTokenizer.from_pretrained(model_id)
8
- model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, load_in_8bit=False, trust_remote_code=True).to('cuda') # Move the model to GPU
9
-
10
  # Define the text generation function
11
  @spaces.GPU # Decorate this function to use GPU
12
  def generate_text(user_input, system_prompt):
 
 
 
 
 
13
  # Combine the system prompt and the user input to form the full prompt
14
  full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
15
 
16
- # Initialize the pipeline for text generation
17
  text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
18
  return_full_text=True, temperature=0.5,
19
- max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0) # Specify the device for the pipeline
20
 
21
  # Generate text based on the full prompt
22
  results = text_generator(full_prompt)
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import spaces # Import the spaces module for ZeroGPU compatibility
4
 
 
 
 
 
 
5
  # Define the text generation function
6
  @spaces.GPU # Decorate this function to use GPU
7
  def generate_text(user_input, system_prompt):
8
+ # Load the model and tokenizer from Hugging Face within the function
9
+ model_id = "seedboxai/KafkaLM-8x7B-German-V0.1-DPO"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
11
+ model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, load_in_8bit=False, trust_remote_code=True).to('cuda') # Move the model to GPU here
12
+
13
  # Combine the system prompt and the user input to form the full prompt
14
  full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
15
 
16
+ # Initialize the pipeline for text generation with the model and tokenizer
17
  text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
18
  return_full_text=True, temperature=0.5,
19
+ max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0) # Ensure device is set to use GPU
20
 
21
  # Generate text based on the full prompt
22
  results = text_generator(full_prompt)