Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,7 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
14 |
|
15 |
import networkx as nx
|
16 |
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
|
|
|
17 |
|
18 |
nltk.download('punkt')
|
19 |
nltk.download('stopwords')
|
@@ -34,6 +35,15 @@ model.load_weights("./marian_model/model.weights.h5")
|
|
34 |
with open("cleaned_word_embeddings.pkl", "rb") as f:
|
35 |
cleaned_word_embeddings = pickle.load(f)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
def get_clean_sentences(text):
|
38 |
sentences = sent_tokenize(text)
|
39 |
# Remove punctuations, numbers and special characters
|
@@ -116,6 +126,6 @@ def translate(text):
|
|
116 |
arabic = tokenizer.decode(out[0], skip_special_tokens=True)
|
117 |
return arabic
|
118 |
|
119 |
-
demo = gr.Interface(fn=
|
120 |
demo.launch(share=True)
|
121 |
|
|
|
14 |
|
15 |
import networkx as nx
|
16 |
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
|
17 |
+
from summarizer import Summarizer,TransformerSummarizer
|
18 |
|
19 |
nltk.download('punkt')
|
20 |
nltk.download('stopwords')
|
|
|
35 |
with open("cleaned_word_embeddings.pkl", "rb") as f:
|
36 |
cleaned_word_embeddings = pickle.load(f)
|
37 |
|
38 |
+
summ_model = TransformerSummarizer(transformer_type="XLNet",transformer_model_key="xlnet-base-cased")
|
39 |
+
|
40 |
+
def translate_pretrained(text):
|
41 |
+
summarized = ''.join(summ_model(text))
|
42 |
+
tokenized = tokenizer([summarized], return_tensors="np")
|
43 |
+
out = model.generate(**tokenized)
|
44 |
+
arabic = tokenizer.decode(out[0], skip_special_tokens=True)
|
45 |
+
return arabic
|
46 |
+
|
47 |
def get_clean_sentences(text):
|
48 |
sentences = sent_tokenize(text)
|
49 |
# Remove punctuations, numbers and special characters
|
|
|
126 |
arabic = tokenizer.decode(out[0], skip_special_tokens=True)
|
127 |
return arabic
|
128 |
|
129 |
+
demo = gr.Interface(fn=translate_pretrained, inputs="text", outputs="text")
|
130 |
demo.launch(share=True)
|
131 |
|