NoaiGPT commited on
Commit
f042b86
1 Parent(s): 06ff2b5
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import gradio as gr
4
- import spaces
5
  # Load the model and tokenizer
6
  model_name = "NoaiGPT/merged-llama3-8b-instruct-1720894657"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -11,15 +11,14 @@ model = AutoModelForCausalLM.from_pretrained(model_name)
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model.to(device)
13
 
 
 
 
14
  # Define the prediction function
15
- @spaces.GPU
16
  def generate_text(prompt):
17
- # Tokenize the input and move to GPU if available
18
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
19
- # Generate text using the model
20
- outputs = model.generate(inputs.input_ids, max_length=200, num_return_sequences=1)
21
- # Decode the generated text
22
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
  return generated_text
24
 
25
  # Define the Gradio interface
 
1
  import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import gradio as gr
4
+
5
  # Load the model and tokenizer
6
  model_name = "NoaiGPT/merged-llama3-8b-instruct-1720894657"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model.to(device)
13
 
14
+ # Create a text generation pipeline
15
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
16
+
17
  # Define the prediction function
 
18
  def generate_text(prompt):
19
+ # Generate text using the pipeline
20
+ outputs = text_generator(prompt, max_length=200, num_return_sequences=1)
21
+ generated_text = outputs[0]["generated_text"]
 
 
 
22
  return generated_text
23
 
24
  # Define the Gradio interface