Upload folder using huggingface_hub
Browse files- README.md +3 -1
- highlighter.py +44 -0
- lcs.py +16 -0
- sampling_methods.py +32 -1
README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
app_file: app.py
|
4 |
sdk: gradio
|
5 |
sdk_version: 4.36.0
|
6 |
---
|
|
|
|
|
|
1 |
---
|
2 |
+
title: aiisc-watermarking-model
|
3 |
app_file: app.py
|
4 |
sdk: gradio
|
5 |
sdk_version: 4.36.0
|
6 |
---
|
7 |
+
|
8 |
+
Clone the repository and ``cd`` into it. Run ``gradio app.py`` to start the server.
|
highlighter.py
CHANGED
@@ -38,6 +38,50 @@ def highlight_common_words(common_words, sentences, title):
|
|
38 |
</div>
|
39 |
'''
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
import re
|
43 |
|
|
|
38 |
</div>
|
39 |
'''
|
40 |
|
41 |
+
# import re
|
42 |
+
|
43 |
+
# def highlight_common_words_dict(common_words, sentences, title, bg_color):
|
44 |
+
# color_map = {}
|
45 |
+
# color_index = 0
|
46 |
+
# highlighted_html = []
|
47 |
+
|
48 |
+
# for idx, (sentence, score) in enumerate(sentences.items(), start=1):
|
49 |
+
# sentence_with_idx = f"{idx}. {sentence}"
|
50 |
+
# highlighted_sentence = sentence_with_idx
|
51 |
+
|
52 |
+
# for index, word in common_words:
|
53 |
+
# if word not in color_map:
|
54 |
+
# color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
|
55 |
+
# color_index += 1
|
56 |
+
# escaped_word = re.escape(word)
|
57 |
+
# pattern = rf'\b{escaped_word}\b'
|
58 |
+
# highlighted_sentence = re.sub(
|
59 |
+
# pattern,
|
60 |
+
# lambda m, idx=index, color=color_map[word]: (
|
61 |
+
# f'<span style="background-color: {color}; font-weight: bold;'
|
62 |
+
# f' padding: 1px 2px; border-radius: 2px; position: relative;">'
|
63 |
+
# f'<span style="background-color: black; color: white; border-radius: 50%;'
|
64 |
+
# f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
|
65 |
+
# f'{m.group(0)}'
|
66 |
+
# f'</span>'
|
67 |
+
# ),
|
68 |
+
# highlighted_sentence,
|
69 |
+
# flags=re.IGNORECASE
|
70 |
+
# )
|
71 |
+
# highlighted_html.append(
|
72 |
+
# f'<div style="margin-bottom: 5px;">'
|
73 |
+
# f'{highlighted_sentence}'
|
74 |
+
# f'<div style="display: inline-block; margin-left: 5px; border: 1px solid #ddd; padding: 3px 5px; border-radius: 3px; background-color: white; font-size: 0.9em;">'
|
75 |
+
# f'Entailment Score: {score}</div></div>'
|
76 |
+
# )
|
77 |
+
|
78 |
+
# final_html = "<br>".join(highlighted_html)
|
79 |
+
# return f'''
|
80 |
+
# <div style="border: solid 1px #; padding: 16px; background-color: {bg_color}; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
|
81 |
+
# <h3 style="margin-top: 0; font-size: 1em; color: #111827;">{title}</h3>
|
82 |
+
# <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
|
83 |
+
# </div>
|
84 |
+
# '''
|
85 |
|
86 |
import re
|
87 |
|
lcs.py
CHANGED
@@ -39,3 +39,19 @@ def find_common_subsequences(sentence, str_list):
|
|
39 |
|
40 |
return indexed_common_grams
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
return indexed_common_grams
|
41 |
|
42 |
+
# Example usage
|
43 |
+
sentence = "Billie Eilish, Charli XCX and Lorde are among a group of young female pop stars who are revealing, in their music, the pressure they have felt to look thin in a time of especially punishing beauty standards."
|
44 |
+
str_list = [
|
45 |
+
'Young female pop stars, such as Billie Eilish, Charli XCX, and Lorde, are using their music to showcase how they have been subjected to harsh beauty standards in recent times.',
|
46 |
+
'A group of young female pop stars, such as Billie Eilish, Charli XCX, and Lorde, are using their music to showcase how they have been subjected to harsh beauty standards in recent times.',
|
47 |
+
'The music of a group of young female pop stars, such as Billie Eilish, Charli XCX and Lorde, is revealing the pressure to appear slim in an age where beauty is highly regulated.',
|
48 |
+
'Through their songs, young female pop stars like Billie Eilish, Charli XCX and Lordé reveal the pressure they have been subjected to in order to appear attractive in an age of strict beauty standards.',
|
49 |
+
'A number of female pop stars, including Billie Eilish, Charli XCx, and Lorde, are using their music to showcase the pressure they have experienced in order not to look unappealing in an age where beauty is highly valued.',
|
50 |
+
"Some young female pop stars, such as Billie Eilish from the R&B Music Hall of Fame in Las Vegas and Charli XCX from Lorde from Manchester's Outer Banks, are using their music to showcase how they have been subjected to harsh beauty standards",
|
51 |
+
'Among the group of young female pop stars who are using their music to showcase their unappealing appearance, are Billié Eilish and Charli XCX while Lorde is currently struggling to maintain her attractive looks.',
|
52 |
+
'Young female pop icons such as Billie Eilish, Charliile XCX and Lorde are using their music to showcase the pressure they have been subjected too harshly to look attractive in an age where beauty is highly regulated.',
|
53 |
+
'Billie Eilish, Charliile XCX and Lordé are just some of the young female pop stars who have been exposing their looks to music in an age where beauty standards are particularly hard to find.',
|
54 |
+
'In the music industry, young female pop stars like Billie Eilish and Charliile XCx (with an average height of 160cm), as well as Lorde, are displaying how they have been subjected to harsh beauty standards.'
|
55 |
+
]
|
56 |
+
|
57 |
+
print(find_common_subsequences(sentence, str_list))
|
sampling_methods.py
CHANGED
@@ -129,4 +129,35 @@ def exponential_minimum_sampling(original_sentence, paraphrased_sentences):
|
|
129 |
f"Paraphrased Sentence {idx+1}": sentence,
|
130 |
"Common Substrings": common_substrings
|
131 |
})
|
132 |
-
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
f"Paraphrased Sentence {idx+1}": sentence,
|
130 |
"Common Substrings": common_substrings
|
131 |
})
|
132 |
+
return results
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
#---------------------------------------------------------------------------
|
137 |
+
# aryans implementation please refactor it as you see fit
|
138 |
+
|
139 |
+
import torch
|
140 |
+
import random
|
141 |
+
|
142 |
+
def sample_word(words, logits, sampling_technique='inverse_transform', temperature=1.0):
|
143 |
+
if sampling_technique == 'inverse_transform':
|
144 |
+
probs = torch.softmax(torch.tensor(logits), dim=-1)
|
145 |
+
cumulative_probs = torch.cumsum(probs, dim=-1)
|
146 |
+
random_prob = random.random()
|
147 |
+
sampled_index = torch.where(cumulative_probs >= random_prob)[0][0]
|
148 |
+
elif sampling_technique == 'exponential_minimum':
|
149 |
+
probs = torch.softmax(torch.tensor(logits), dim=-1)
|
150 |
+
exp_probs = torch.exp(-torch.log(probs))
|
151 |
+
random_probs = torch.rand_like(exp_probs)
|
152 |
+
sampled_index = torch.argmax(random_probs * exp_probs)
|
153 |
+
elif sampling_technique == 'temperature':
|
154 |
+
scaled_logits = torch.tensor(logits) / temperature
|
155 |
+
probs = torch.softmax(scaled_logits, dim=-1)
|
156 |
+
sampled_index = torch.multinomial(probs, 1).item()
|
157 |
+
elif sampling_technique == 'greedy':
|
158 |
+
sampled_index = torch.argmax(torch.tensor(logits)).item()
|
159 |
+
else:
|
160 |
+
raise ValueError("Invalid sampling technique. Choose 'inverse_transform', 'exponential_minimum', 'temperature', or 'greedy'.")
|
161 |
+
|
162 |
+
sampled_word = words[sampled_index]
|
163 |
+
return sampled_word
|