trtd56 commited on
Commit
77bdb05
·
verified ·
1 Parent(s): 2c6594f

Update script.py

Browse files
Files changed (1) hide show
  1. script.py +17 -5
script.py CHANGED
@@ -1,10 +1,22 @@
 
 
1
  import pandas as pd
 
2
 
3
- print("################################")
4
- import os
5
- os.listdir("/tmp/data")
6
  test_df = pd.read_csv("/tmp/data/test.csv")
7
- print("################################")
8
 
9
- df = pd.DataFrame([(f"testid{i:04}", 0) for i in range(837)], columns=["id", "pred"])
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  df.to_csv("submission.csv", index=None)
 
1
+ import pickle
2
+ import numpy as np
3
  import pandas as pd
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
 
 
 
 
6
  test_df = pd.read_csv("/tmp/data/test.csv")
 
7
 
8
+ with open("model.pkl", "rb") as f:
9
+ model = pickle.load(f)
10
+
11
+ scores = []
12
+ for _, row in test_df.iterrows():
13
+ X_query = model["tokenizer"].transform([row["Query"]])
14
+ is_cand = sum([(model["faq_ids"] == row[f"FAQ{i+1}"]).astype(int) for i in range(3)]) > 0
15
+ sim = cosine_similarity(X_query, model["X_faq"][is_cand])[0]
16
+ score = sim.max()
17
+ scores.append(score)
18
+
19
+ predict = (np.array(scores) > model["thr"]).astype(int)
20
+
21
+ df = pd.DataFrame([(f"testid{i:04}", v) for i, v in enumerate(predict)], columns=["id", "pred"])
22
  df.to_csv("submission.csv", index=None)