Spaces:
Running
Running
Nathan Butters
commited on
Commit
·
4a74d0b
1
Parent(s):
d82212a
optimize nltk
Browse files- .ipynb_checkpoints/app-checkpoint.py +5 -2
- NLselector.py +6 -6
- app.py +5 -2
.ipynb_checkpoints/app-checkpoint.py
CHANGED
@@ -42,8 +42,11 @@ def prepare_model():
|
|
42 |
|
43 |
@st.experimental_singleton
|
44 |
def prepare_lists():
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
47 |
countries = pd.read_csv("Assets/Countries/combined-countries.csv")
|
48 |
professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
|
49 |
word_lists = [list(countries.Words.apply(lambda x: x.lower())),list(professions.Words)]
|
|
|
42 |
|
43 |
@st.experimental_singleton
|
44 |
def prepare_lists():
|
45 |
+
try:
|
46 |
+
wordnet.synsets("bias")
|
47 |
+
except:
|
48 |
+
nltk.download('omw-1.4')
|
49 |
+
nltk.download('wordnet')
|
50 |
countries = pd.read_csv("Assets/Countries/combined-countries.csv")
|
51 |
professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
|
52 |
word_lists = [list(countries.Words.apply(lambda x: x.lower())),list(professions.Words)]
|
NLselector.py
CHANGED
@@ -181,9 +181,9 @@ def abs_dif(df,seed):
|
|
181 |
|
182 |
text2 = Nearest Prediction
|
183 |
text3 = Farthest Prediction'''
|
184 |
-
|
185 |
-
target = df[df['Words'] == seed].pred.iloc[0]
|
186 |
-
sub_df = df[df['Words'] != seed].reset_index()
|
187 |
nearest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[:1]]
|
188 |
farthest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[-1:]]
|
189 |
text2 = sub_df.text.iloc[nearest_prediction.index[0]]
|
@@ -207,15 +207,15 @@ def sampled_alts(df, seed, fixed=False):
|
|
207 |
def gen_cf_country(df,_document,selection):
|
208 |
df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
|
209 |
df['pred'] = df.text.apply(eval_pred)
|
210 |
-
df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection else 'alternative')
|
211 |
df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
|
212 |
return df
|
213 |
|
214 |
def gen_cf_profession(df,_document,selection):
|
215 |
-
category = df.loc[df['Words'] == selection, 'Major'].iloc[0]
|
216 |
df = df[df.Major == category]
|
217 |
df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
|
218 |
df['pred'] = df.text.apply(eval_pred)
|
219 |
-
df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection else 'alternative')
|
220 |
df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
|
221 |
return df
|
|
|
181 |
|
182 |
text2 = Nearest Prediction
|
183 |
text3 = Farthest Prediction'''
|
184 |
+
seed = process_text(seed)
|
185 |
+
target = df[df['Words'].str.lower() == seed].pred.iloc[0]
|
186 |
+
sub_df = df[df['Words'].str.lower() != seed].reset_index()
|
187 |
nearest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[:1]]
|
188 |
farthest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[-1:]]
|
189 |
text2 = sub_df.text.iloc[nearest_prediction.index[0]]
|
|
|
207 |
def gen_cf_country(df,_document,selection):
|
208 |
df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
|
209 |
df['pred'] = df.text.apply(eval_pred)
|
210 |
+
df['seed'] = df.Words.apply(lambda x: 'seed' if x.lower() == selection.lower() else 'alternative')
|
211 |
df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
|
212 |
return df
|
213 |
|
214 |
def gen_cf_profession(df,_document,selection):
|
215 |
+
category = df.loc[df['Words'] == selection.lower(), 'Major'].iloc[0]
|
216 |
df = df[df.Major == category]
|
217 |
df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
|
218 |
df['pred'] = df.text.apply(eval_pred)
|
219 |
+
df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection.lower() else 'alternative')
|
220 |
df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
|
221 |
return df
|
app.py
CHANGED
@@ -42,8 +42,11 @@ def prepare_model():
|
|
42 |
|
43 |
@st.experimental_singleton
|
44 |
def prepare_lists():
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
47 |
countries = pd.read_csv("Assets/Countries/combined-countries.csv")
|
48 |
professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
|
49 |
word_lists = [list(countries.Words.apply(lambda x: x.lower())),list(professions.Words)]
|
|
|
42 |
|
43 |
@st.experimental_singleton
|
44 |
def prepare_lists():
|
45 |
+
try:
|
46 |
+
wordnet.synsets("bias")
|
47 |
+
except:
|
48 |
+
nltk.download('omw-1.4')
|
49 |
+
nltk.download('wordnet')
|
50 |
countries = pd.read_csv("Assets/Countries/combined-countries.csv")
|
51 |
professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
|
52 |
word_lists = [list(countries.Words.apply(lambda x: x.lower())),list(professions.Words)]
|