Spaces:
Running
Running
Update: add datasets in container (docker) environment
Browse files- .github/workflows/pipeline.yaml +15 -2
- Dockerfile +4 -0
- pipeline_schedule.ctl +1 -1
- training.py +2 -1
.github/workflows/pipeline.yaml
CHANGED
@@ -7,8 +7,8 @@ on:
|
|
7 |
tags:
|
8 |
- '*'
|
9 |
schedule:
|
10 |
-
- cron: "0
|
11 |
-
#
|
12 |
|
13 |
jobs:
|
14 |
extraction_train_modeling:
|
@@ -116,6 +116,19 @@ jobs:
|
|
116 |
name: posttrained.zip
|
117 |
overwrite: "true"
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
- name: Commit changes
|
120 |
if: env.match != 'true'
|
121 |
run: |
|
|
|
7 |
tags:
|
8 |
- '*'
|
9 |
schedule:
|
10 |
+
- cron: "0 13 * * *"
|
11 |
+
# 20 - 7 = 13
|
12 |
|
13 |
jobs:
|
14 |
extraction_train_modeling:
|
|
|
116 |
name: posttrained.zip
|
117 |
overwrite: "true"
|
118 |
|
119 |
+
- name: Remove Temporarary Files and Directories
|
120 |
+
if: env.match != 'true'
|
121 |
+
run: |
|
122 |
+
rm models.zip
|
123 |
+
rm pickles.zip
|
124 |
+
rm datasets.zip
|
125 |
+
rm posttrained.zip
|
126 |
+
|
127 |
+
rm -rf models
|
128 |
+
rm -rf pickles
|
129 |
+
rm -rf datasets
|
130 |
+
rm -rf posttrained
|
131 |
+
|
132 |
- name: Commit changes
|
133 |
if: env.match != 'true'
|
134 |
run: |
|
Dockerfile
CHANGED
@@ -32,6 +32,10 @@ RUN --mount=type=secret,id=PICKLES_ID,mode=0444,required=true \
|
|
32 |
gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
|
33 |
unzip pickles.zip && rm pickles.zip
|
34 |
|
|
|
|
|
|
|
|
|
35 |
RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
|
36 |
gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
|
37 |
unzip posttrained.zip && rm posttrained.zip
|
|
|
32 |
gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
|
33 |
unzip pickles.zip && rm pickles.zip
|
34 |
|
35 |
+
RUN --mount=type=secret,id=DATASETS_ID,mode=0444,required=true \
|
36 |
+
gdown https://drive.google.com/uc?id=$(cat /run/secrets/DATASETS_ID) && \
|
37 |
+
unzip datasets.zip && rm datasets.zip
|
38 |
+
|
39 |
RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
|
40 |
gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
|
41 |
unzip posttrained.zip && rm posttrained.zip
|
pipeline_schedule.ctl
CHANGED
@@ -1 +1 @@
|
|
1 |
-
2024-07-
|
|
|
1 |
+
2024-07-28
|
training.py
CHANGED
@@ -138,7 +138,8 @@ def main():
|
|
138 |
dataframe = pd.read_csv(os.path.join(datasets_path, dataset), index_col='Date')[['Close']]
|
139 |
model_file = os.path.join(models_path, f"{dataset.replace('.csv', '')}.keras")
|
140 |
|
141 |
-
dataframe = data_processor.preprocess_data(dataframe)
|
|
|
142 |
standard_scaler, dataframe = data_processor.scale_data(dataframe, StandardScaler)
|
143 |
minmax_scaler, dataframe = data_processor.scale_data(dataframe, MinMaxScaler)
|
144 |
|
|
|
138 |
dataframe = pd.read_csv(os.path.join(datasets_path, dataset), index_col='Date')[['Close']]
|
139 |
model_file = os.path.join(models_path, f"{dataset.replace('.csv', '')}.keras")
|
140 |
|
141 |
+
# dataframe = data_processor.preprocess_data(dataframe)
|
142 |
+
dataframe.dropna(inplace = True)
|
143 |
standard_scaler, dataframe = data_processor.scale_data(dataframe, StandardScaler)
|
144 |
minmax_scaler, dataframe = data_processor.scale_data(dataframe, MinMaxScaler)
|
145 |
|