tebakaja commited on
Commit
5446c65
1 Parent(s): 39b1093

Update: add datasets in container (docker) environment

Browse files
.github/workflows/pipeline.yaml CHANGED
@@ -7,8 +7,8 @@ on:
7
  tags:
8
  - '*'
9
  schedule:
10
- - cron: "0 3 * * *"
11
- # 8 - 7 = 1
12
 
13
  jobs:
14
  extraction_train_modeling:
@@ -116,6 +116,19 @@ jobs:
116
  name: posttrained.zip
117
  overwrite: "true"
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  - name: Commit changes
120
  if: env.match != 'true'
121
  run: |
 
7
  tags:
8
  - '*'
9
  schedule:
10
+ - cron: "0 13 * * *"
11
+ # 20 - 7 = 13
12
 
13
  jobs:
14
  extraction_train_modeling:
 
116
  name: posttrained.zip
117
  overwrite: "true"
118
 
119
+ - name: Remove Temporarary Files and Directories
120
+ if: env.match != 'true'
121
+ run: |
122
+ rm models.zip
123
+ rm pickles.zip
124
+ rm datasets.zip
125
+ rm posttrained.zip
126
+
127
+ rm -rf models
128
+ rm -rf pickles
129
+ rm -rf datasets
130
+ rm -rf posttrained
131
+
132
  - name: Commit changes
133
  if: env.match != 'true'
134
  run: |
Dockerfile CHANGED
@@ -32,6 +32,10 @@ RUN --mount=type=secret,id=PICKLES_ID,mode=0444,required=true \
32
  gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
33
  unzip pickles.zip && rm pickles.zip
34
 
 
 
 
 
35
  RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
36
  gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
37
  unzip posttrained.zip && rm posttrained.zip
 
32
  gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
33
  unzip pickles.zip && rm pickles.zip
34
 
35
+ RUN --mount=type=secret,id=DATASETS_ID,mode=0444,required=true \
36
+ gdown https://drive.google.com/uc?id=$(cat /run/secrets/DATASETS_ID) && \
37
+ unzip datasets.zip && rm datasets.zip
38
+
39
  RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
40
  gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
41
  unzip posttrained.zip && rm posttrained.zip
pipeline_schedule.ctl CHANGED
@@ -1 +1 @@
1
- 2024-07-27
 
1
+ 2024-07-28
training.py CHANGED
@@ -138,7 +138,8 @@ def main():
138
  dataframe = pd.read_csv(os.path.join(datasets_path, dataset), index_col='Date')[['Close']]
139
  model_file = os.path.join(models_path, f"{dataset.replace('.csv', '')}.keras")
140
 
141
- dataframe = data_processor.preprocess_data(dataframe)
 
142
  standard_scaler, dataframe = data_processor.scale_data(dataframe, StandardScaler)
143
  minmax_scaler, dataframe = data_processor.scale_data(dataframe, MinMaxScaler)
144
 
 
138
  dataframe = pd.read_csv(os.path.join(datasets_path, dataset), index_col='Date')[['Close']]
139
  model_file = os.path.join(models_path, f"{dataset.replace('.csv', '')}.keras")
140
 
141
+ # dataframe = data_processor.preprocess_data(dataframe)
142
+ dataframe.dropna(inplace = True)
143
  standard_scaler, dataframe = data_processor.scale_data(dataframe, StandardScaler)
144
  minmax_scaler, dataframe = data_processor.scale_data(dataframe, MinMaxScaler)
145