Spaces:
Runtime error
Runtime error
updated code
Browse files- .ipynb_checkpoints/utils-checkpoint.py +7 -5
- utils.py +7 -5
.ipynb_checkpoints/utils-checkpoint.py
CHANGED
@@ -57,11 +57,12 @@ def get_multiple_predictions(csv):
|
|
57 |
df = pd.read_csv(csv)
|
58 |
df.columns = ['sequence']
|
59 |
|
60 |
-
df['
|
61 |
-
df['
|
|
|
62 |
|
63 |
# Remove OOV words
|
64 |
-
df['sequence_clean'] = df['
|
65 |
|
66 |
# Remove rows with blank string
|
67 |
invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
|
@@ -79,16 +80,17 @@ def get_multiple_predictions(csv):
|
|
79 |
|
80 |
# Join back to original sequence
|
81 |
final_results = df.join(pred_results)
|
82 |
-
final_results.drop(columns=['sequence_clean'], inplace=True)
|
83 |
final_results['others'] = final_results[labels].max(axis=1)
|
84 |
final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
|
85 |
|
86 |
# Get sentiment labels
|
87 |
-
final_results['sentiment'] = final_results['
|
88 |
candidate_labels=['positive', 'negative'],
|
89 |
hypothesis_template='The sentiment of this is {}'))
|
90 |
)
|
91 |
|
|
|
|
|
92 |
# Append invalid rows
|
93 |
if len(invalid) == 0:
|
94 |
return final_results.to_csv(index=False).encode('utf-8')
|
|
|
57 |
df = pd.read_csv(csv)
|
58 |
df.columns = ['sequence']
|
59 |
|
60 |
+
df['sequence_clean'] = df['sequence'].str.lower() #lower case
|
61 |
+
df['sequence_clean'] = df['sequence_clean'].str.strip()
|
62 |
+
df['sequence_clean'] = df['sequence_clean'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
|
63 |
|
64 |
# Remove OOV words
|
65 |
+
df['sequence_clean'] = df['sequence_clean'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
|
66 |
|
67 |
# Remove rows with blank string
|
68 |
invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
|
|
|
80 |
|
81 |
# Join back to original sequence
|
82 |
final_results = df.join(pred_results)
|
|
|
83 |
final_results['others'] = final_results[labels].max(axis=1)
|
84 |
final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
|
85 |
|
86 |
# Get sentiment labels
|
87 |
+
final_results['sentiment'] = final_results['sequence_clean'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
|
88 |
candidate_labels=['positive', 'negative'],
|
89 |
hypothesis_template='The sentiment of this is {}'))
|
90 |
)
|
91 |
|
92 |
+
final_results.drop(columns=['sequence_clean'], inplace=True)
|
93 |
+
|
94 |
# Append invalid rows
|
95 |
if len(invalid) == 0:
|
96 |
return final_results.to_csv(index=False).encode('utf-8')
|
utils.py
CHANGED
@@ -57,11 +57,12 @@ def get_multiple_predictions(csv):
|
|
57 |
df = pd.read_csv(csv)
|
58 |
df.columns = ['sequence']
|
59 |
|
60 |
-
df['
|
61 |
-
df['
|
|
|
62 |
|
63 |
# Remove OOV words
|
64 |
-
df['sequence_clean'] = df['
|
65 |
|
66 |
# Remove rows with blank string
|
67 |
invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
|
@@ -79,16 +80,17 @@ def get_multiple_predictions(csv):
|
|
79 |
|
80 |
# Join back to original sequence
|
81 |
final_results = df.join(pred_results)
|
82 |
-
final_results.drop(columns=['sequence_clean'], inplace=True)
|
83 |
final_results['others'] = final_results[labels].max(axis=1)
|
84 |
final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
|
85 |
|
86 |
# Get sentiment labels
|
87 |
-
final_results['sentiment'] = final_results['
|
88 |
candidate_labels=['positive', 'negative'],
|
89 |
hypothesis_template='The sentiment of this is {}'))
|
90 |
)
|
91 |
|
|
|
|
|
92 |
# Append invalid rows
|
93 |
if len(invalid) == 0:
|
94 |
return final_results.to_csv(index=False).encode('utf-8')
|
|
|
57 |
df = pd.read_csv(csv)
|
58 |
df.columns = ['sequence']
|
59 |
|
60 |
+
df['sequence_clean'] = df['sequence'].str.lower() #lower case
|
61 |
+
df['sequence_clean'] = df['sequence_clean'].str.strip()
|
62 |
+
df['sequence_clean'] = df['sequence_clean'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
|
63 |
|
64 |
# Remove OOV words
|
65 |
+
df['sequence_clean'] = df['sequence_clean'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
|
66 |
|
67 |
# Remove rows with blank string
|
68 |
invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
|
|
|
80 |
|
81 |
# Join back to original sequence
|
82 |
final_results = df.join(pred_results)
|
|
|
83 |
final_results['others'] = final_results[labels].max(axis=1)
|
84 |
final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
|
85 |
|
86 |
# Get sentiment labels
|
87 |
+
final_results['sentiment'] = final_results['sequence_clean'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
|
88 |
candidate_labels=['positive', 'negative'],
|
89 |
hypothesis_template='The sentiment of this is {}'))
|
90 |
)
|
91 |
|
92 |
+
final_results.drop(columns=['sequence_clean'], inplace=True)
|
93 |
+
|
94 |
# Append invalid rows
|
95 |
if len(invalid) == 0:
|
96 |
return final_results.to_csv(index=False).encode('utf-8')
|