clui commited on
Commit
00bb291
verified
1 Parent(s): 6649309

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import nltk
4
+
5
+ # Wczytywanie modelu i tokenizera
6
+ model_name = "clui/opus-mt-it-pl"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
+
10
+ # Upewnij si臋, 偶e zasoby NLTK s膮 zainstalowane
11
+ nltk.download('punkt')
12
+
13
+ def preprocess_text(input_text):
14
+ """Usuwa nag艂贸wki i zb臋dne spacje z tekstu."""
15
+ return ' '.join(line.strip() for line in input_text.splitlines() if line.strip() and not line.startswith('['))
16
+
17
+ def translate_text(text):
18
+ """T艂umaczy tekst przy u偶yciu modelu."""
19
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).input_ids
20
+ outputs = model.generate(inputs, max_new_tokens=200, do_sample=True, top_k=30, top_p=0.95)
21
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
22
+
23
+ def translate_long_text(long_text):
24
+ """T艂umaczy d艂ugi tekst, dziel膮c go na ca艂e zdania."""
25
+ sentences = nltk.sent_tokenize(long_text) # Dzielimy tekst na zdania
26
+ translated_sentences = [translate_text(sentence) for sentence in sentences] # T艂umaczymy ka偶de zdanie
27
+ return ' '.join(translated_sentences) # 艁膮czymy przet艂umaczone zdania
28
+
29
+ # Interfejs u偶ytkownika
30
+ st.title("T艂umacz")
31
+ text = st.text_area("Wprowad藕 tekst:")
32
+
33
+ if st.button("T艂umacz"):
34
+ if text:
35
+ clean_text = preprocess_text(text)
36
+ if clean_text:
37
+ translated_text = translate_long_text(clean_text)
38
+ st.subheader("Przet艂umaczone:")
39
+ st.write(translated_text)
40
+ else:
41
+ st.warning("Brak tekstu do przet艂umaczenia po przetworzeniu.")
42
+ else:
43
+ st.warning("Prosz臋 wprowadzi膰 tekst do przet艂umaczenia.")