minhdang14902 commited on
Commit
d727a1f
·
verified ·
1 Parent(s): 38cb1f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -7,7 +7,6 @@ import nltk
7
  from transformers.models.roberta.modeling_roberta import *
8
  from transformers import RobertaForQuestionAnswering
9
  from nltk import word_tokenize
10
- import spacy
11
  import json
12
  import pandas as pd
13
  import re
@@ -21,7 +20,7 @@ phoBert_tokenizer = AutoTokenizer.from_pretrained('minhdang14902/PhoBert_Edu')
21
  chatbot_pipeline = pipeline("sentiment-analysis", model=phoBert_model, tokenizer=phoBert_tokenizer)
22
 
23
  # Load spaCy Vietnamese model
24
- nlp = spacy.load('vi_core_news_lg')
25
 
26
  # Load intents from json file
27
  def load_json_file(filename):
@@ -57,13 +56,13 @@ num_labels = len(labels)
57
  id2label = {id: label for id, label in enumerate(labels)}
58
  label2id = {label: id for id, label in enumerate(labels)}
59
 
60
- def tokenize_with_spacy(text):
61
- doc = nlp(text)
62
- tokens = [token.text for token in doc]
63
- tokenized_text = ' '.join(tokens)
64
- tokenized_text = re.sub(r'(?<!\s)([.,?])', r' \1', tokenized_text)
65
- tokenized_text = re.sub(r'([.,?])(?!\s)', r'\1 ', tokenized_text)
66
- return tokenized_text
67
 
68
  # Load Roberta model and tokenizer
69
  roberta_model_checkpoint = "minhdang14902/Roberta_edu"
 
7
  from transformers.models.roberta.modeling_roberta import *
8
  from transformers import RobertaForQuestionAnswering
9
  from nltk import word_tokenize
 
10
  import json
11
  import pandas as pd
12
  import re
 
20
  chatbot_pipeline = pipeline("sentiment-analysis", model=phoBert_model, tokenizer=phoBert_tokenizer)
21
 
22
  # Load spaCy Vietnamese model
23
+ # nlp = spacy.load('vi_core_news_lg')
24
 
25
  # Load intents from json file
26
  def load_json_file(filename):
 
56
  id2label = {id: label for id, label in enumerate(labels)}
57
  label2id = {label: id for id, label in enumerate(labels)}
58
 
59
+ # def tokenize_with_spacy(text):
60
+ # doc = nlp(text)
61
+ # tokens = [token.text for token in doc]
62
+ # tokenized_text = ' '.join(tokens)
63
+ # tokenized_text = re.sub(r'(?<!\s)([.,?])', r' \1', tokenized_text)
64
+ # tokenized_text = re.sub(r'([.,?])(?!\s)', r'\1 ', tokenized_text)
65
+ # return tokenized_text
66
 
67
  # Load Roberta model and tokenizer
68
  roberta_model_checkpoint = "minhdang14902/Roberta_edu"