jgyasu commited on
Commit
38c3a0a
·
verified ·
1 Parent(s): 0840f0a

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +0 -1
  2. highlighter.py +66 -14
app.py CHANGED
@@ -56,7 +56,6 @@ class WatermarkingPipeline:
56
  all_tokenized_sentences = []
57
  all_tokenized_sentences.append(self.user_prompt_tokenized)
58
  all_tokenized_sentences.extend(self.selected_sentences_tokenized)
59
- all_tokenized_sentences.extend(self.discarded_sentences_tokenized)
60
 
61
  self.common_grams = find_non_melting_points(all_tokenized_sentences)
62
 
 
56
  all_tokenized_sentences = []
57
  all_tokenized_sentences.append(self.user_prompt_tokenized)
58
  all_tokenized_sentences.extend(self.selected_sentences_tokenized)
 
59
 
60
  self.common_grams = find_non_melting_points(all_tokenized_sentences)
61
 
highlighter.py CHANGED
@@ -42,6 +42,53 @@ def highlight_common_words(common_words, sentences, title):
42
 
43
  import re
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def highlight_common_words_dict(common_words, sentences, title):
46
  color_map = {}
47
  color_index = 0
@@ -61,31 +108,36 @@ def highlight_common_words_dict(common_words, sentences, title):
61
  pattern,
62
  lambda m, idx=index, color=color_map[word]: (
63
  f'<span style="background-color: {color}; font-weight: bold;'
64
- f' padding: 1px 2px; border-radius: 2px; position: relative;">'
65
  f'<span style="background-color: black; color: white; border-radius: 50%;'
66
- f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
67
  f'{m.group(0)}'
68
  f'</span>'
69
  ),
70
  highlighted_sentence,
71
  flags=re.IGNORECASE
72
  )
 
 
73
  highlighted_html.append(
74
  f'<div style="margin-bottom: 5px;">'
75
  f'{highlighted_sentence}'
76
- f'<div style="display: inline-block; margin-left: 5px; padding: 3px 5px; border-radius: 3px; background-color: white; font-size: 0.9em;">'
77
  f'Entailment Score: {score}</div></div>'
78
  )
79
 
80
- final_html = "<br>".join(highlighted_html)
81
  return f'''
82
- <div style="background-color: #ffffff; color: #374151;">
83
  <h3 style="margin-top: 0; font-size: 1em; color: #111827;">{title}</h3>
84
  <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
85
  </div>
86
  '''
87
 
88
 
 
 
 
89
  def reparaphrased_sentences_html(sentences):
90
 
91
  formatted_sentences = []
@@ -105,14 +157,14 @@ def reparaphrased_sentences_html(sentences):
105
  '''
106
 
107
 
108
- common_words = [(1, "highlight"), (2, "numbering")]
109
- sentences = ["This is a test to highlight words.", "Numbering is important for clarity."]
110
 
111
- # Test highlight_common_words
112
- highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
113
- print(highlighted_html)
114
 
115
- # Test highlight_common_words_dict
116
- sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8}
117
- highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
118
- print(highlighted_html_dict)
 
42
 
43
  import re
44
 
45
+ # def highlight_common_words_dict(common_words, sentences, title):
46
+ # color_map = {}
47
+ # color_index = 0
48
+ # highlighted_html = []
49
+
50
+ # # Ensure indices in common_words are integers
51
+ # sanitized_common_words = [(int(index), word) for index, word in common_words]
52
+
53
+ # for idx, (sentence, score) in enumerate(sentences.items(), start=1):
54
+ # sentence_with_idx = f"{idx}. {sentence}"
55
+ # highlighted_sentence = sentence_with_idx
56
+
57
+ # for index, word in sanitized_common_words:
58
+ # if word not in color_map:
59
+ # color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
60
+ # color_index += 1
61
+ # escaped_word = re.escape(word)
62
+ # pattern = rf'\b{escaped_word}\b'
63
+ # highlighted_sentence = re.sub(
64
+ # pattern,
65
+ # lambda m, idx=index, color=color_map[word]: (
66
+ # f'<span style="background-color: {color}; font-weight: bold;'
67
+ # f' padding: 1px 2px; border-radius: 2px; position: relative;">'
68
+ # f'<span style="background-color: black; color: white; border-radius: 50%;'
69
+ # f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
70
+ # f'{m.group(0)}'
71
+ # f'</span>'
72
+ # ),
73
+ # highlighted_sentence,
74
+ # flags=re.IGNORECASE
75
+ # )
76
+ # highlighted_html.append(
77
+ # f'<div style="margin-bottom: 5px;">'
78
+ # f'{highlighted_sentence}'
79
+ # f'<div style="display: inline-block; margin-left: 5px; padding: 3px 5px; border-radius: 3px; background-color: white; font-size: 0.9em;">'
80
+ # f'Entailment Score: {score}</div></div>'
81
+ # )
82
+
83
+ # final_html = "<br>".join(highlighted_html)
84
+ # return f'''
85
+ # <div style="background-color: #ffffff; color: #374151;">
86
+ # <h3 style="margin-top: 0; font-size: 1em; color: #111827;">{title}</h3>
87
+ # <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
88
+ # </div>
89
+ # '''
90
+
91
+
92
  def highlight_common_words_dict(common_words, sentences, title):
93
  color_map = {}
94
  color_index = 0
 
108
  pattern,
109
  lambda m, idx=index, color=color_map[word]: (
110
  f'<span style="background-color: {color}; font-weight: bold;'
111
+ f' padding: 2px 4px; border-radius: 2px; position: relative;">'
112
  f'<span style="background-color: black; color: white; border-radius: 50%;'
113
+ f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
114
  f'{m.group(0)}'
115
  f'</span>'
116
  ),
117
  highlighted_sentence,
118
  flags=re.IGNORECASE
119
  )
120
+
121
+ # Append the sentence and entailment score to the HTML
122
  highlighted_html.append(
123
  f'<div style="margin-bottom: 5px;">'
124
  f'{highlighted_sentence}'
125
+ f'<div style="display: inline-block; margin-left: 10px; padding: 3px 8px; border-radius: 5px; background-color: #E5E7EB; font-size: 0.9em; color: #374151;">'
126
  f'Entailment Score: {score}</div></div>'
127
  )
128
 
129
+ final_html = "<br><br>".join(highlighted_html)
130
  return f'''
131
+ <div style="border: solid 1px #E5E7EB; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
132
  <h3 style="margin-top: 0; font-size: 1em; color: #111827;">{title}</h3>
133
  <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
134
  </div>
135
  '''
136
 
137
 
138
+
139
+
140
+
141
  def reparaphrased_sentences_html(sentences):
142
 
143
  formatted_sentences = []
 
157
  '''
158
 
159
 
160
+ # common_words = [(1, "highlight"), (2, "numbering")]
161
+ # sentences = ["This is a test to highlight words.", "Numbering is important for clarity."]
162
 
163
+ # # Test highlight_common_words
164
+ # highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
165
+ # print(highlighted_html)
166
 
167
+ # # Test highlight_common_words_dict
168
+ # sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8}
169
+ # highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
170
+ # print(highlighted_html_dict)