nandovallec commited on
Commit
d457c9f
1 Parent(s): 47eae45

Keep training

Browse files
Files changed (1) hide show
  1. recommender.py +28 -5
recommender.py CHANGED
@@ -5,6 +5,15 @@ import numpy as np
5
  import pandas as pd
6
 
7
 
 
 
 
 
 
 
 
 
 
8
  def inference_row(list_tid, ps_matrix):
9
  ps_matrix_norm = pp.normalize(ps_matrix, axis=1)
10
  length_tid = len(list_tid)
@@ -17,6 +26,9 @@ def inference_row(list_tid, ps_matrix):
17
 
18
  def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
19
  df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
 
 
 
20
  sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
21
  sim_vector = sim_vector.toarray()[0].tolist()
22
 
@@ -55,7 +67,9 @@ def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
55
  if (n == K):
56
  break
57
 
58
- return new_list
 
 
59
 
60
 
61
  def inference_from_tid(list_tid, K=50, MAX_tid=10):
@@ -64,10 +78,19 @@ def inference_from_tid(list_tid, K=50, MAX_tid=10):
64
 
65
  with open(pickle_path, 'rb') as f:
66
  ps_matrix = pickle.load(f)
67
-
68
- ps_matrix_row = ps_matrix.tocsr()
69
-
70
- return get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
 
 
 
 
 
 
 
 
 
71
 
72
 
73
  def inference_from_uri(list_uri, K=50, MAX_tid=10):
 
5
  import pandas as pd
6
 
7
 
8
+ def add_row_train(df, list_tid):
9
+ new_pid_add = df.iloc[-1].name +1
10
+ list_tid_add = list_tid
11
+ list_pos_add = list(range(len(list_tid_add)))
12
+
13
+ df.loc[new_pid_add] = {'tid': list_tid_add,'pos': list_pos_add}
14
+ return df
15
+
16
+
17
  def inference_row(list_tid, ps_matrix):
18
  ps_matrix_norm = pp.normalize(ps_matrix, axis=1)
19
  length_tid = len(list_tid)
 
26
 
27
  def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
28
  df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
29
+ df_ps_train_extra = pd.read_hdf('model/df_ps_train_extra.hdf')
30
+ df_ps_train = pd.concat([df_ps_train,df_ps_train_extra])
31
+
32
  sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
33
  sim_vector = sim_vector.toarray()[0].tolist()
34
 
 
67
  if (n == K):
68
  break
69
 
70
+ df_ps_train_extra = add_row_train(df_ps_train_extra, current_list)
71
+ df_ps_train_extra.to_hdf('model/df_ps_train_extra.hdf', key='abc')
72
+ return new_list, sparse_row
73
 
74
 
75
  def inference_from_tid(list_tid, K=50, MAX_tid=10):
 
78
 
79
  with open(pickle_path, 'rb') as f:
80
  ps_matrix = pickle.load(f)
81
+
82
+ with open("model/giantMatrix_extra.pickle",'rb') as f:
83
+ ps_matrix_extra = pickle.load(f)
84
+
85
+ ps_matrix = vstack((ps_matrix,ps_matrix_extra))
86
+
87
+ result, sparse_row = get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
88
+ ps_matrix_extra = vstack((ps_matrix_extra,sparse_row.todok()))
89
+
90
+ with open("model/giantMatrix_extra.pickle", 'wb') as f:
91
+ pickle.dump(ps_matrix_extra, f)
92
+
93
+ return result
94
 
95
 
96
  def inference_from_uri(list_uri, K=50, MAX_tid=10):