Richard Neuschulz
commited on
Commit
•
5683d80
1
Parent(s):
411ebc8
into function
Browse files
app.py
CHANGED
@@ -2,21 +2,21 @@ import gradio as gr
|
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
3 |
import spaces # Import the spaces module for ZeroGPU compatibility
|
4 |
|
5 |
-
# Load the model and tokenizer from Hugging Face
|
6 |
-
model_id = "seedboxai/KafkaLM-8x7B-German-V0.1-DPO"
|
7 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
8 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, load_in_8bit=False, trust_remote_code=True).to('cuda') # Move the model to GPU
|
9 |
-
|
10 |
# Define the text generation function
|
11 |
@spaces.GPU # Decorate this function to use GPU
|
12 |
def generate_text(user_input, system_prompt):
|
|
|
|
|
|
|
|
|
|
|
13 |
# Combine the system prompt and the user input to form the full prompt
|
14 |
full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
|
15 |
|
16 |
-
# Initialize the pipeline for text generation
|
17 |
text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
|
18 |
return_full_text=True, temperature=0.5,
|
19 |
-
max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0) #
|
20 |
|
21 |
# Generate text based on the full prompt
|
22 |
results = text_generator(full_prompt)
|
|
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
3 |
import spaces # Import the spaces module for ZeroGPU compatibility
|
4 |
|
|
|
|
|
|
|
|
|
|
|
5 |
# Define the text generation function
|
6 |
@spaces.GPU # Decorate this function to use GPU
|
7 |
def generate_text(user_input, system_prompt):
|
8 |
+
# Load the model and tokenizer from Hugging Face within the function
|
9 |
+
model_id = "seedboxai/KafkaLM-8x7B-German-V0.1-DPO"
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, load_in_8bit=False, trust_remote_code=True).to('cuda') # Move the model to GPU here
|
12 |
+
|
13 |
# Combine the system prompt and the user input to form the full prompt
|
14 |
full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
|
15 |
|
16 |
+
# Initialize the pipeline for text generation with the model and tokenizer
|
17 |
text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
|
18 |
return_full_text=True, temperature=0.5,
|
19 |
+
max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0) # Ensure device is set to use GPU
|
20 |
|
21 |
# Generate text based on the full prompt
|
22 |
results = text_generator(full_prompt)
|