File size: 2,531 Bytes
e8cb9c9
3c4c2ab
b34ecb0
 
 
dcdaaec
f306d19
557899a
8e5beee
d46dad1
b34ecb0
b30b5d8
c4556a0
ebb0ce9
39f1739
f306d19
6f18088
 
 
a0f8996
6f18088
 
 
 
 
 
 
a0f8996
 
 
 
 
b34ecb0
 
 
 
 
 
 
 
 
 
 
1b0511d
b34ecb0
1b0511d
b34ecb0
 
1b0511d
cedcd4a
b34ecb0
 
 
 
 
 
 
1b0511d
b34ecb0
b30b5d8
9d39817
0f3065b
5826562
 
9f584e5
b34ecb0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import gradio as gr
from gradio import mix
import numpy as np
import torch
from keras.utils.data_utils import pad_sequences


from huggingface_hub import from_pretrained_keras

model = from_pretrained_keras("keras-io/text-generation-miniature-gpt")

a = []
word_to_index = {}

a_file = open("imdb.vocab") # get vocab tokens
for line in a_file:
  a.append(line.strip())
print(len(a))

for index, word in enumerate(a):
    word_to_index[index] = word

tokenizer = Tokenizer(num_words=80, split=' ')
tokenizer.fit_on_texts(word_to_index.values()) # fit tokenizer on vocab tokens
  
def text_process_pipeline(start_prompt): #pipeline

  processed_text = tokenizer.texts_to_sequences(start_prompt)
  processed_text = pad_sequences(processed_text, maxlen=80, padding='post')
  return processed_text
  
def sample_from(logits):
  l, i = tf.math.top_k(logits, k=10, sorted=True)
  indices = np.asarray(i).astype("int32")
  preds = keras.activations.softmax(tf.expand_dims(l, 0))[0]
  preds = np.asarray(preds).astype("float32")
  return np.random.choice(i, p=preds)

def generate_answers(text):
  num_tokens_generated = 0 
  sample_index = len([text]) - 1
  tokens_generated= []

  text_out = text_process_pipeline([text])
  predictions,_ = model.predict(text_out)
  results = np.argmax(predictions, axis=1)[0]

  while num_tokens_generated <= 40:
    sample_token = sample_from(predictions[0][sample_index])
    tokens_generated.append(sample_token)
    num_tokens_generated = len(tokens_generated)

  text_out = tokenizer.sequences_to_texts([tokens_generated])
  return text_out[0]
    
examples = [["I was fortunate to attend the London premier of this film. While I am not at all a fan of British drama, I did find myself deeply moved by the characters and the BAD CHOICES they made. I was in tears by the end of the film. Every scene was mesmerizing. The attention to detail and the excellent acting was quite impressive."],["The movie was nice, "], ["It was showing nothing special to "]]
title = "Text Generation with Miniature GPT"
description = "Gradio Demo for a miniature with GPT. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."

iface = gr.Interface(fn=generate_answers, title = title, description=description, inputs=['text'], outputs=["text"], examples=examples)
iface.launch(debug=True)