Nathan Butters commited on
Commit
4a74d0b
·
1 Parent(s): d82212a

optimize nltk

Browse files
Files changed (3) hide show
  1. .ipynb_checkpoints/app-checkpoint.py +5 -2
  2. NLselector.py +6 -6
  3. app.py +5 -2
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -42,8 +42,11 @@ def prepare_model():
42
 
43
  @st.experimental_singleton
44
  def prepare_lists():
45
- nltk.download('omw-1.4')
46
- nltk.download('wordnet')
 
 
 
47
  countries = pd.read_csv("Assets/Countries/combined-countries.csv")
48
  professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
49
  word_lists = [list(countries.Words.apply(lambda x: x.lower())),list(professions.Words)]
 
42
 
43
  @st.experimental_singleton
44
  def prepare_lists():
45
+ try:
46
+ wordnet.synsets("bias")
47
+ except:
48
+ nltk.download('omw-1.4')
49
+ nltk.download('wordnet')
50
  countries = pd.read_csv("Assets/Countries/combined-countries.csv")
51
  professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
52
  word_lists = [list(countries.Words.apply(lambda x: x.lower())),list(professions.Words)]
NLselector.py CHANGED
@@ -181,9 +181,9 @@ def abs_dif(df,seed):
181
 
182
  text2 = Nearest Prediction
183
  text3 = Farthest Prediction'''
184
- #seed = process_text(seed)
185
- target = df[df['Words'] == seed].pred.iloc[0]
186
- sub_df = df[df['Words'] != seed].reset_index()
187
  nearest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[:1]]
188
  farthest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[-1:]]
189
  text2 = sub_df.text.iloc[nearest_prediction.index[0]]
@@ -207,15 +207,15 @@ def sampled_alts(df, seed, fixed=False):
207
  def gen_cf_country(df,_document,selection):
208
  df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
209
  df['pred'] = df.text.apply(eval_pred)
210
- df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection else 'alternative')
211
  df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
212
  return df
213
 
214
  def gen_cf_profession(df,_document,selection):
215
- category = df.loc[df['Words'] == selection, 'Major'].iloc[0]
216
  df = df[df.Major == category]
217
  df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
218
  df['pred'] = df.text.apply(eval_pred)
219
- df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection else 'alternative')
220
  df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
221
  return df
 
181
 
182
  text2 = Nearest Prediction
183
  text3 = Farthest Prediction'''
184
+ seed = process_text(seed)
185
+ target = df[df['Words'].str.lower() == seed].pred.iloc[0]
186
+ sub_df = df[df['Words'].str.lower() != seed].reset_index()
187
  nearest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[:1]]
188
  farthest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[-1:]]
189
  text2 = sub_df.text.iloc[nearest_prediction.index[0]]
 
207
  def gen_cf_country(df,_document,selection):
208
  df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
209
  df['pred'] = df.text.apply(eval_pred)
210
+ df['seed'] = df.Words.apply(lambda x: 'seed' if x.lower() == selection.lower() else 'alternative')
211
  df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
212
  return df
213
 
214
  def gen_cf_profession(df,_document,selection):
215
+ category = df.loc[df['Words'] == selection.lower(), 'Major'].iloc[0]
216
  df = df[df.Major == category]
217
  df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
218
  df['pred'] = df.text.apply(eval_pred)
219
+ df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection.lower() else 'alternative')
220
  df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
221
  return df
app.py CHANGED
@@ -42,8 +42,11 @@ def prepare_model():
42
 
43
  @st.experimental_singleton
44
  def prepare_lists():
45
- nltk.download('omw-1.4')
46
- nltk.download('wordnet')
 
 
 
47
  countries = pd.read_csv("Assets/Countries/combined-countries.csv")
48
  professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
49
  word_lists = [list(countries.Words.apply(lambda x: x.lower())),list(professions.Words)]
 
42
 
43
  @st.experimental_singleton
44
  def prepare_lists():
45
+ try:
46
+ wordnet.synsets("bias")
47
+ except:
48
+ nltk.download('omw-1.4')
49
+ nltk.download('wordnet')
50
  countries = pd.read_csv("Assets/Countries/combined-countries.csv")
51
  professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
52
  word_lists = [list(countries.Words.apply(lambda x: x.lower())),list(professions.Words)]