File size: 1,182 Bytes
6616039
84028a4
c73d70f
 
 
 
 
 
 
b096b37
84028a4
b096b37
84028a4
 
 
b096b37
84028a4
 
9147dc2
84028a4
9147dc2
84028a4
 
 
 
 
 
 
 
9147dc2
84028a4
 
82098b3
 
fa71355
3cba6d9
84028a4
 
fa71355
84028a4
35feef4
e4c6728
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gradio as gr

import nltk
import simplemma
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.probability import FreqDist
from simplemma import text_lemmatizer
nltk.download('punkt')

file = "text.txt"

def get_lists(file):
  with open(file, 'r', encoding='utf-8') as f:
    text = f.read()

  sent_tokenized_text = sent_tokenize(text, language='italian')
  sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text]

  return sent_tokenized_text, sent_tokenized_text_lower

sentences, sentences_lower = get_lists(file)

def search_engine(target):

  result = []
  for i,sent in enumerate(sentences_lower):
    if target.lower() in sent:
      result.append(sentences[i])

  if len(result) == 0:
    return f"Non ho trovato la parola '{target}' nei testi.\n"
    
  stringed_results = ''
  for n,r in enumerate(result):
    stringed_results += str(n+1) + ': ' + str(r) + '\n\n'
  
  else:
    return f"""Ho trovato {len(result)} {"frasi" if len(result) > 1 else "frase"} in cui è presente la parola {target}.\n\n {stringed_results}"""

demo = gr.Interface(fn=search_engine, inputs='text', outputs='text')
demo.launch()