IinjyI commited on
Commit
a9cf0ea
1 Parent(s): aee8616

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -14,6 +14,7 @@ from sklearn.metrics.pairwise import cosine_similarity
14
 
15
  import networkx as nx
16
  from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
 
17
 
18
  nltk.download('punkt')
19
  nltk.download('stopwords')
@@ -34,6 +35,15 @@ model.load_weights("./marian_model/model.weights.h5")
34
  with open("cleaned_word_embeddings.pkl", "rb") as f:
35
  cleaned_word_embeddings = pickle.load(f)
36
 
 
 
 
 
 
 
 
 
 
37
  def get_clean_sentences(text):
38
  sentences = sent_tokenize(text)
39
  # Remove punctuations, numbers and special characters
@@ -116,6 +126,6 @@ def translate(text):
116
  arabic = tokenizer.decode(out[0], skip_special_tokens=True)
117
  return arabic
118
 
119
- demo = gr.Interface(fn=translate, inputs="text", outputs="text")
120
  demo.launch(share=True)
121
 
 
14
 
15
  import networkx as nx
16
  from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
17
+ from summarizer import Summarizer,TransformerSummarizer
18
 
19
  nltk.download('punkt')
20
  nltk.download('stopwords')
 
35
  with open("cleaned_word_embeddings.pkl", "rb") as f:
36
  cleaned_word_embeddings = pickle.load(f)
37
 
38
+ summ_model = TransformerSummarizer(transformer_type="XLNet",transformer_model_key="xlnet-base-cased")
39
+
40
+ def translate_pretrained(text):
41
+ summarized = ''.join(summ_model(text))
42
+ tokenized = tokenizer([summarized], return_tensors="np")
43
+ out = model.generate(**tokenized)
44
+ arabic = tokenizer.decode(out[0], skip_special_tokens=True)
45
+ return arabic
46
+
47
  def get_clean_sentences(text):
48
  sentences = sent_tokenize(text)
49
  # Remove punctuations, numbers and special characters
 
126
  arabic = tokenizer.decode(out[0], skip_special_tokens=True)
127
  return arabic
128
 
129
+ demo = gr.Interface(fn=translate_pretrained, inputs="text", outputs="text")
130
  demo.launch(share=True)
131