File size: 950 Bytes
85e3d20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import subprocess
import pandas as pd
import random

taskname = "home-data-for-ml-course"
download_dir = "../env"
script_dir = f"."

input(f"Consent to the competition at https://www.kaggle.com/competitions/{taskname}/data; Press any key")

subprocess.run(["kaggle", "competitions", "download", "-c", taskname], cwd=download_dir) 
subprocess.run(["unzip", "-n", f"{taskname}.zip"], cwd=download_dir) 
subprocess.run(["rm", f"{taskname}.zip"], cwd=download_dir)
# subprocess.run(["rm", "-rf", "*.csv.gz"], cwd=download_dir)

trainset = pd.read_csv(f"{download_dir}/train.csv")
trainset = trainset.reset_index(drop=True)
trainset.iloc[:int(len(trainset)*0.8)].to_csv(f"{download_dir}/train.csv", index=False)
testset = trainset.iloc[int(len(trainset)*0.8):]

testset.drop(list(trainset.keys())[1:-1], axis=1).to_csv(f"{script_dir}/answer.csv", index=False)
testset = testset.drop(['SalePrice'], axis=1).to_csv(f"{download_dir}/test.csv", index=False)