Spaces:

du-lab
/

MLR-Copilot

Running

App Files Files Community

MLR-Copilot / benchmarks /house-price /env /train.py

Lim0011

Upload 251 files

85e3d20 verified 6 months ago

raw

history blame

1.77 kB

	# Import helpful libraries
	import pandas as pd
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_absolute_error

	# Load the data, and separate the target
	iowa_file_path = 'train.csv'
	home_data = pd.read_csv(iowa_file_path)

	y = home_data.SalePrice

	# You can change the features needed for this task depending on your understanding of the features and the final task
	features = ['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']

	# Select columns corresponding to features, and preview the data
	X = home_data[features]

	# Split into testing and training data
	train_X, valid_X, train_y, valid_y = train_test_split(X, y, random_state=1)

	# ***********************************************
	# In this part of the code, write and train the model on the above dataset to perform the task.
	# This part should populate the variable train_mae and valid_mae on the model selected
	# ***********************************************


	# ***********************************************
	# End of the main training module
	# ***********************************************

	print("Train MAE: {:,.0f}".format(train_mae))
	print("Validation MAE: {:,.0f}".format(valid_mae))

	test_data = pd.read_csv('test.csv')
	test_X = test_data[features]
	test_preds = model.predict(test_X)

	output = pd.DataFrame({'Id': test_data.Id,
	'SalePrice': test_preds})
	output.to_csv('submission.csv', index=False)