Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app.py +0 -1
- highlighter.py +66 -14
app.py
CHANGED
@@ -56,7 +56,6 @@ class WatermarkingPipeline:
|
|
56 |
all_tokenized_sentences = []
|
57 |
all_tokenized_sentences.append(self.user_prompt_tokenized)
|
58 |
all_tokenized_sentences.extend(self.selected_sentences_tokenized)
|
59 |
-
all_tokenized_sentences.extend(self.discarded_sentences_tokenized)
|
60 |
|
61 |
self.common_grams = find_non_melting_points(all_tokenized_sentences)
|
62 |
|
|
|
56 |
all_tokenized_sentences = []
|
57 |
all_tokenized_sentences.append(self.user_prompt_tokenized)
|
58 |
all_tokenized_sentences.extend(self.selected_sentences_tokenized)
|
|
|
59 |
|
60 |
self.common_grams = find_non_melting_points(all_tokenized_sentences)
|
61 |
|
highlighter.py
CHANGED
@@ -42,6 +42,53 @@ def highlight_common_words(common_words, sentences, title):
|
|
42 |
|
43 |
import re
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
def highlight_common_words_dict(common_words, sentences, title):
|
46 |
color_map = {}
|
47 |
color_index = 0
|
@@ -61,31 +108,36 @@ def highlight_common_words_dict(common_words, sentences, title):
|
|
61 |
pattern,
|
62 |
lambda m, idx=index, color=color_map[word]: (
|
63 |
f'<span style="background-color: {color}; font-weight: bold;'
|
64 |
-
f' padding:
|
65 |
f'<span style="background-color: black; color: white; border-radius: 50%;'
|
66 |
-
f' padding:
|
67 |
f'{m.group(0)}'
|
68 |
f'</span>'
|
69 |
),
|
70 |
highlighted_sentence,
|
71 |
flags=re.IGNORECASE
|
72 |
)
|
|
|
|
|
73 |
highlighted_html.append(
|
74 |
f'<div style="margin-bottom: 5px;">'
|
75 |
f'{highlighted_sentence}'
|
76 |
-
f'<div style="display: inline-block; margin-left:
|
77 |
f'Entailment Score: {score}</div></div>'
|
78 |
)
|
79 |
|
80 |
-
final_html = "<br>".join(highlighted_html)
|
81 |
return f'''
|
82 |
-
<div style="background-color: #
|
83 |
<h3 style="margin-top: 0; font-size: 1em; color: #111827;">{title}</h3>
|
84 |
<div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
|
85 |
</div>
|
86 |
'''
|
87 |
|
88 |
|
|
|
|
|
|
|
89 |
def reparaphrased_sentences_html(sentences):
|
90 |
|
91 |
formatted_sentences = []
|
@@ -105,14 +157,14 @@ def reparaphrased_sentences_html(sentences):
|
|
105 |
'''
|
106 |
|
107 |
|
108 |
-
common_words = [(1, "highlight"), (2, "numbering")]
|
109 |
-
sentences = ["This is a test to highlight words.", "Numbering is important for clarity."]
|
110 |
|
111 |
-
# Test highlight_common_words
|
112 |
-
highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
|
113 |
-
print(highlighted_html)
|
114 |
|
115 |
-
# Test highlight_common_words_dict
|
116 |
-
sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8}
|
117 |
-
highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
|
118 |
-
print(highlighted_html_dict)
|
|
|
42 |
|
43 |
import re
|
44 |
|
45 |
+
# def highlight_common_words_dict(common_words, sentences, title):
|
46 |
+
# color_map = {}
|
47 |
+
# color_index = 0
|
48 |
+
# highlighted_html = []
|
49 |
+
|
50 |
+
# # Ensure indices in common_words are integers
|
51 |
+
# sanitized_common_words = [(int(index), word) for index, word in common_words]
|
52 |
+
|
53 |
+
# for idx, (sentence, score) in enumerate(sentences.items(), start=1):
|
54 |
+
# sentence_with_idx = f"{idx}. {sentence}"
|
55 |
+
# highlighted_sentence = sentence_with_idx
|
56 |
+
|
57 |
+
# for index, word in sanitized_common_words:
|
58 |
+
# if word not in color_map:
|
59 |
+
# color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
|
60 |
+
# color_index += 1
|
61 |
+
# escaped_word = re.escape(word)
|
62 |
+
# pattern = rf'\b{escaped_word}\b'
|
63 |
+
# highlighted_sentence = re.sub(
|
64 |
+
# pattern,
|
65 |
+
# lambda m, idx=index, color=color_map[word]: (
|
66 |
+
# f'<span style="background-color: {color}; font-weight: bold;'
|
67 |
+
# f' padding: 1px 2px; border-radius: 2px; position: relative;">'
|
68 |
+
# f'<span style="background-color: black; color: white; border-radius: 50%;'
|
69 |
+
# f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
|
70 |
+
# f'{m.group(0)}'
|
71 |
+
# f'</span>'
|
72 |
+
# ),
|
73 |
+
# highlighted_sentence,
|
74 |
+
# flags=re.IGNORECASE
|
75 |
+
# )
|
76 |
+
# highlighted_html.append(
|
77 |
+
# f'<div style="margin-bottom: 5px;">'
|
78 |
+
# f'{highlighted_sentence}'
|
79 |
+
# f'<div style="display: inline-block; margin-left: 5px; padding: 3px 5px; border-radius: 3px; background-color: white; font-size: 0.9em;">'
|
80 |
+
# f'Entailment Score: {score}</div></div>'
|
81 |
+
# )
|
82 |
+
|
83 |
+
# final_html = "<br>".join(highlighted_html)
|
84 |
+
# return f'''
|
85 |
+
# <div style="background-color: #ffffff; color: #374151;">
|
86 |
+
# <h3 style="margin-top: 0; font-size: 1em; color: #111827;">{title}</h3>
|
87 |
+
# <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
|
88 |
+
# </div>
|
89 |
+
# '''
|
90 |
+
|
91 |
+
|
92 |
def highlight_common_words_dict(common_words, sentences, title):
|
93 |
color_map = {}
|
94 |
color_index = 0
|
|
|
108 |
pattern,
|
109 |
lambda m, idx=index, color=color_map[word]: (
|
110 |
f'<span style="background-color: {color}; font-weight: bold;'
|
111 |
+
f' padding: 2px 4px; border-radius: 2px; position: relative;">'
|
112 |
f'<span style="background-color: black; color: white; border-radius: 50%;'
|
113 |
+
f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
|
114 |
f'{m.group(0)}'
|
115 |
f'</span>'
|
116 |
),
|
117 |
highlighted_sentence,
|
118 |
flags=re.IGNORECASE
|
119 |
)
|
120 |
+
|
121 |
+
# Append the sentence and entailment score to the HTML
|
122 |
highlighted_html.append(
|
123 |
f'<div style="margin-bottom: 5px;">'
|
124 |
f'{highlighted_sentence}'
|
125 |
+
f'<div style="display: inline-block; margin-left: 10px; padding: 3px 8px; border-radius: 5px; background-color: #E5E7EB; font-size: 0.9em; color: #374151;">'
|
126 |
f'Entailment Score: {score}</div></div>'
|
127 |
)
|
128 |
|
129 |
+
final_html = "<br><br>".join(highlighted_html)
|
130 |
return f'''
|
131 |
+
<div style="border: solid 1px #E5E7EB; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
|
132 |
<h3 style="margin-top: 0; font-size: 1em; color: #111827;">{title}</h3>
|
133 |
<div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
|
134 |
</div>
|
135 |
'''
|
136 |
|
137 |
|
138 |
+
|
139 |
+
|
140 |
+
|
141 |
def reparaphrased_sentences_html(sentences):
|
142 |
|
143 |
formatted_sentences = []
|
|
|
157 |
'''
|
158 |
|
159 |
|
160 |
+
# common_words = [(1, "highlight"), (2, "numbering")]
|
161 |
+
# sentences = ["This is a test to highlight words.", "Numbering is important for clarity."]
|
162 |
|
163 |
+
# # Test highlight_common_words
|
164 |
+
# highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
|
165 |
+
# print(highlighted_html)
|
166 |
|
167 |
+
# # Test highlight_common_words_dict
|
168 |
+
# sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8}
|
169 |
+
# highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
|
170 |
+
# print(highlighted_html_dict)
|