ajeetkumar01 commited on
Commit
2fa0eaf
·
verified ·
1 Parent(s): 0e7d5d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -27
app.py CHANGED
@@ -1,3 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
@@ -5,8 +70,7 @@ import gradio as gr
5
  # Load pre-trained GPT-2 model and tokenizer
6
  model_name = "gpt2-large"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name)
9
-
10
 
11
  def generate_text(input_text, max_length=32, num_beams=5, do_sample=False, no_repeat_ngram_size=2):
12
  """
@@ -21,43 +85,22 @@ def generate_text(input_text, max_length=32, num_beams=5, do_sample=False, no_re
21
  - generated_text (str): The generated text.
22
  """
23
  # Encode the input text and move it to the appropriate device
24
- input_ids = tokenizer(input_text, return_tensors='pt')['input_ids']
25
  # Generate text using the model
26
  output = model.generate(input_ids, max_length=max_length, num_beams=num_beams,
27
  do_sample=do_sample, no_repeat_ngram_size=no_repeat_ngram_size)
28
  # Decode the generated output
29
- generated_text = tokenizer.decode(output[0])
30
  return generated_text
31
 
32
 
33
- # def generate_text_with_nucleus_search(input_text, max_length=16, do_sample=True, top_p=0.9):
34
- # """
35
- # Generate text with nucleus sampling based on the given input text.
36
- # Parameters:
37
- # - input_text (str): The input text to start generation from.
38
- # - max_length (int): Maximum length of the generated text.
39
- # - do_sample (bool): Whether to use sampling or not.
40
- # - top_p (float): Nucleus sampling parameter.
41
- # Returns:
42
- # - generated_text (str): The generated text.
43
- # """
44
- # # Encode the input text and move it to the appropriate device
45
- # input_ids = tokenizer(input_text, return_tensors='pt')['input_ids']
46
- # # Generate text using nucleus sampling
47
- # output = model.generate(input_ids, max_length=max_length, do_sample=do_sample, top_p=top_p)
48
- # # Decode the generated output
49
- # generated_text = tokenizer.decode(output[0])
50
- # return generated_text
51
-
52
-
53
  # Create Gradio interface
54
  input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter text for text generation...")
55
  output_text = gr.Textbox(label="Generated Text")
56
 
57
-
58
-
59
  gr.Interface(generate_text, input_text, output_text,
60
  title="Text Generation with GPT-2",
61
  description="Generate text using the GPT-2 model.",
62
  theme="default",
63
- allow_flagging="never").launch(share=True)
 
 
1
+ # import torch
2
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ # import gradio as gr
4
+
5
+ # # Load pre-trained GPT-2 model and tokenizer
6
+ # model_name = "gpt2-large"
7
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ # model = AutoModelForCausalLM.from_pretrained(model_name)
9
+
10
+
11
+ # def generate_text(input_text, max_length=32, num_beams=5, do_sample=False, no_repeat_ngram_size=2):
12
+ # """
13
+ # Generate text based on the given input text.
14
+ # Parameters:
15
+ # - input_text (str): The input text to start generation from.
16
+ # - max_length (int): Maximum length of the generated text.
17
+ # - num_beams (int): Number of beams for beam search.
18
+ # - do_sample (bool): Whether to use sampling or not.
19
+ # - no_repeat_ngram_size (int): Size of the n-gram to avoid repetition.
20
+ # Returns:
21
+ # - generated_text (str): The generated text.
22
+ # """
23
+ # # Encode the input text and move it to the appropriate device
24
+ # input_ids = tokenizer(input_text, return_tensors='pt')['input_ids']
25
+ # # Generate text using the model
26
+ # output = model.generate(input_ids, max_length=max_length, num_beams=num_beams,
27
+ # do_sample=do_sample, no_repeat_ngram_size=no_repeat_ngram_size)
28
+ # # Decode the generated output
29
+ # generated_text = tokenizer.decode(output[0])
30
+ # return generated_text
31
+
32
+
33
+ # # def generate_text_with_nucleus_search(input_text, max_length=16, do_sample=True, top_p=0.9):
34
+ # # """
35
+ # # Generate text with nucleus sampling based on the given input text.
36
+ # # Parameters:
37
+ # # - input_text (str): The input text to start generation from.
38
+ # # - max_length (int): Maximum length of the generated text.
39
+ # # - do_sample (bool): Whether to use sampling or not.
40
+ # # - top_p (float): Nucleus sampling parameter.
41
+ # # Returns:
42
+ # # - generated_text (str): The generated text.
43
+ # # """
44
+ # # # Encode the input text and move it to the appropriate device
45
+ # # input_ids = tokenizer(input_text, return_tensors='pt')['input_ids']
46
+ # # # Generate text using nucleus sampling
47
+ # # output = model.generate(input_ids, max_length=max_length, do_sample=do_sample, top_p=top_p)
48
+ # # # Decode the generated output
49
+ # # generated_text = tokenizer.decode(output[0])
50
+ # # return generated_text
51
+
52
+
53
+ # # Create Gradio interface
54
+ # input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter text for text generation...")
55
+ # output_text = gr.Textbox(label="Generated Text")
56
+
57
+
58
+
59
+ # gr.Interface(generate_text, input_text, output_text,
60
+ # title="Text Generation with GPT-2",
61
+ # description="Generate text using the GPT-2 model.",
62
+ # theme="default",
63
+ # allow_flagging="never").launch(share=True)
64
+
65
+
66
  import torch
67
  from transformers import AutoTokenizer, AutoModelForCausalLM
68
  import gradio as gr
 
70
  # Load pre-trained GPT-2 model and tokenizer
71
  model_name = "gpt2-large"
72
  tokenizer = AutoTokenizer.from_pretrained(model_name)
73
+ model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
 
74
 
75
  def generate_text(input_text, max_length=32, num_beams=5, do_sample=False, no_repeat_ngram_size=2):
76
  """
 
85
  - generated_text (str): The generated text.
86
  """
87
  # Encode the input text and move it to the appropriate device
88
+ input_ids = tokenizer(input_text, return_tensors='pt', padding=True)['input_ids']
89
  # Generate text using the model
90
  output = model.generate(input_ids, max_length=max_length, num_beams=num_beams,
91
  do_sample=do_sample, no_repeat_ngram_size=no_repeat_ngram_size)
92
  # Decode the generated output
93
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
94
  return generated_text
95
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  # Create Gradio interface
98
  input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter text for text generation...")
99
  output_text = gr.Textbox(label="Generated Text")
100
 
 
 
101
  gr.Interface(generate_text, input_text, output_text,
102
  title="Text Generation with GPT-2",
103
  description="Generate text using the GPT-2 model.",
104
  theme="default",
105
+ allow_flagging="never").launch()
106
+