File size: 6,639 Bytes
86f2d3a
 
 
 
 
 
 
 
62a9f8b
 
86f2d3a
 
 
 
 
 
62a9f8b
86f2d3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62a9f8b
 
86f2d3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

import sys
import time

import printj
from transformers import pipeline  # , set_seed
import numpy as np
import pandas as pd
# import nltk
import re

class StoryGenerator:
    def __init__(self):
        self.initialise_models()
        self.stats_df = pd.DataFrame(data=[], columns=[])
        self.stories = []
        # nltk.download()
    
    
    def initialise_models(self):
        start = time.time()
        self.generator = pipeline('text-generation', model='gpt2')
        self.classifier = pipeline("text-classification",
                                   model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
        initialising_time = time.time()-start
        print(f'Initialising Time: {initialising_time}')
        # set_seed(42)
        # sys.exit()

    def reset():
        self.clear_stories()
        self.clear_stats()

    def clear_stories(self):
        self.stories = []

    def clear_stats(self):
        self.stats_df = pd.DataFrame(data=[], columns=[])

    @staticmethod
    def get_num_token(text):
        # return len(nltk.word_tokenize(text))
        return len(re.findall(r'\w+', text))

    @staticmethod
    def check_show_emotion(confidence_score, frequency, w):
        frequency_penalty = 1 - frequency
        probability_emote = w * confidence_score + (1-w) * frequency_penalty
        return probability_emote > np.random.random_sample()

    def story(self,
              story_till_now="Hello, I'm a language model,",
              num_generation=4,
              length=10):
        # last_length = 0

        for i in range(num_generation):
            last_length = len(story_till_now)
            genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(story_till_now) +
                                                     length*i, num_return_sequences=1)
            story_till_now = genreate_robot_sentence[0]['generated_text']
            new_sentence = story_till_now[last_length:]
            emotions = self.classifier(new_sentence)
            emotion = max(emotions[0], key=lambda x: x['score'])
            # printj.yellow(f'Sentence {i}:')
            # story_to_print = f'{printj.ColorText.cyan(story_till_now[:last_length])}{printj.ColorText.green(story_till_now[last_length:])}\n'
            # print(story_to_print)
            # printj.purple(f'Emotion: {emotion}')
        return story_till_now, emotion

    def auto_ist(self,
                 story_till_now="Hello, I'm a language model,",
                 num_generation=4,
                 length=20, reaction_weight=0.5):
        stats_df = pd.DataFrame(data=[], columns=[])
        stats_dict = dict()
        num_reactions = 0
        reaction_frequency = 0
        for i in range(num_generation):
            # Text generation for User
            last_length = len(story_till_now)
            printj.cyan(story_till_now)
            printj.red.bold_on_white(
                f'loop: {i}; generate user text; length: {last_length}')
            genreate_user_sentence = self.generator(story_till_now, max_length=self.get_num_token(
                story_till_now)+length, num_return_sequences=1)
            story_till_now = genreate_user_sentence[0]['generated_text']
            new_sentence = story_till_now[last_length:]

            printj.red.bold_on_white(f'loop: {i}; check emotion')
            # Emotion self.classifier for User
            emotions = self.classifier(new_sentence)
            emotion = max(emotions[0], key=lambda x: x['score'])
            if emotion['label'] == 'neutral':
                show_emotion = False
            else:
                reaction_frequency = num_reactions/(i+1)
                show_emotion = self.check_show_emotion(
                    confidence_score=emotion['score'], frequency=reaction_frequency, w=reaction_weight)
            if show_emotion:
                num_reactions += 1

            # Text generation for Robot
            last_length = len(story_till_now)
            printj.cyan(story_till_now)
            printj.red.bold_on_white(
                f'loop: {i}; generate robot text; length: {last_length}')
            genreate_robot_sentence = self.generator(story_till_now, max_length=self.get_num_token(
                story_till_now)+length, num_return_sequences=1)
            story_till_now = genreate_robot_sentence[0]['generated_text']
            new_sentence = story_till_now[last_length:]

            # emotions = self.classifier(new_sentence)
            # emotion = max(emotions[0], key=lambda x:x['score'])

            stats_dict['sentence_no'] = i
            stats_dict['show_emotion'] = show_emotion
            stats_dict['emotion_label'] = emotion['label']
            stats_dict['emotion_score'] = emotion['score']
            stats_dict['num_reactions'] = num_reactions
            stats_dict['reaction_frequency'] = reaction_frequency
            stats_dict['reaction_weight'] = reaction_weight
            stats_df = pd.concat(
                [stats_df, pd.DataFrame(stats_dict, index=[f'idx_{i}'])])
        return stats_df, story_till_now

    def get_stats(self,
                  story_till_now="Hello, I'm a language model,",
                  num_generation=4,
                  length=20, reaction_weight=-1, num_tests=2):
        use_random_w = reaction_weight == -1
        self.stories = []
        try:
            num_rows = max(self.stats_df.test_id)+1
        except Exception:
            num_rows=0
        for test_id in range(num_tests):
            if use_random_w:
                # reaction_weight = np.random.random_sample()
                reaction_weight = np.round(np.random.random_sample(), 1)
            stats_df0, _story_till_now = self.auto_ist(
                story_till_now=story_till_now,
                num_generation=4,
                length=20, reaction_weight=reaction_weight)
            stats_df0.insert(loc=0, column='test_id', value=test_id+num_rows)

            # stats_df0['test_id'] = test_id
            self.stats_df = pd.concat([self.stats_df, stats_df0])
            printj.yellow(f'test_id: {test_id}')
            printj.green(stats_df0)
            self.stories.append(_story_till_now)
        self.stats_df = self.stats_df.reset_index(drop=True)
        print(self.stats_df)

    def save_stats(self, path='pandas_simple.xlsx'):
        writer = pd.ExcelWriter(path, engine='xlsxwriter')

        # Convert the dataframe to an XlsxWriter Excel object.
        self.stats_df.to_excel(writer, sheet_name='IST')

        # Close the Pandas Excel writer and output the Excel file.
        writer.save()