Spaces:
Runtime error
Runtime error
File size: 1,182 Bytes
6616039 84028a4 c73d70f b096b37 84028a4 b096b37 84028a4 b096b37 84028a4 9147dc2 84028a4 9147dc2 84028a4 9147dc2 84028a4 82098b3 fa71355 3cba6d9 84028a4 fa71355 84028a4 35feef4 e4c6728 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import gradio as gr
import nltk
import simplemma
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.probability import FreqDist
from simplemma import text_lemmatizer
nltk.download('punkt')
file = "text.txt"
def get_lists(file):
with open(file, 'r', encoding='utf-8') as f:
text = f.read()
sent_tokenized_text = sent_tokenize(text, language='italian')
sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]
return sent_tokenized_text, sent_tokenized_text_lower
sentences, sentences_lower = get_lists(file)
def search_engine(target):
result = []
for i,sent in enumerate(sentences_lower):
if target.lower() in sent:
result.append(sentences[i])
if len(result) == 0:
return f"Non ho trovato la parola '{target}' nei testi.\n"
stringed_results = ''
for n,r in enumerate(result):
stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
else:
return f"""Ho trovato {len(result)} {"frasi" if len(result) > 1 else "frase"} in cui è presente la parola {target}.\n\n {stringed_results}"""
demo = gr.Interface(fn=search_engine, inputs='text', outputs='text')
demo.launch()
|