Spaces:
Runtime error
Runtime error
#!/usr/local/bin/python3 | |
# avenir-python: Machine Learning | |
# Author: Pranab Ghosh | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); you | |
# may not use this file except in compliance with the License. You may | |
# obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |
# implied. See the License for the specific language governing | |
# permissions and limitations under the License. | |
# Package imports | |
import os | |
import sys | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import sklearn as sk | |
import matplotlib | |
import random | |
import jprops | |
from io import StringIO | |
from sklearn.model_selection import cross_val_score | |
import joblib | |
from random import randint | |
from io import StringIO | |
from sklearn.linear_model import LinearRegression | |
sys.path.append(os.path.abspath("../lib")) | |
from util import * | |
from mlutil import * | |
from pasearch import * | |
class BaseRegressor(object): | |
""" | |
base regression class | |
""" | |
def __init__(self, configFile, defValues): | |
""" | |
intializer | |
""" | |
defValues["common.mode"] = ("train", None) | |
defValues["common.model.directory"] = ("model", None) | |
defValues["common.model.file"] = (None, None) | |
defValues["common.scale.file.path"] = (None, "missing scale file path") | |
defValues["common.preprocessing"] = (None, None) | |
defValues["common.verbose"] = (False, None) | |
defValues["train.data.file"] = (None, "missing training data file") | |
defValues["train.data.fields"] = (None, "missing training data field ordinals") | |
defValues["train.data.feature.fields"] = (None, "missing training data feature field ordinals") | |
defValues["train.data.out.field"] = (None, "missing out field ordinal") | |
self.config = Configuration(configFile, defValues) | |
self.featData = None | |
self.outData = None | |
self.regressor = None | |
self.verbose = self.config.getBooleanConfig("common.verbose")[0] | |
self.mode = self.config.getBooleanConfig("common.mode")[0] | |
logFilePath = self.config.getStringConfig("common.logging.file")[0] | |
logLevName = self.config.getStringConfig("common.logging.level")[0] | |
self.logger = createLogger(__name__, logFilePath, logLevName) | |
self.logger.info("********* starting session") | |
def initConfig(self, configFile, defValues): | |
""" | |
initialize config | |
""" | |
self.config = Configuration(configFile, defValues) | |
def getConfig(self): | |
""" | |
get config object | |
""" | |
return self.config | |
def setConfigParam(self, name, value): | |
""" | |
set config param | |
""" | |
self.config.setParam(name, value) | |
def getMode(self): | |
""" | |
get mode | |
""" | |
return self.mode | |
def train(self): | |
""" | |
train model | |
""" | |
#build model | |
self.buildModel() | |
# training data | |
if self.featData is None: | |
(featData, outData) = self.prepData("train") | |
(self.featData, self.outData) = (featData, outData) | |
else: | |
(featData, outData) = (self.featData, self.outData) | |
# parameters | |
modelSave = self.config.getBooleanConfig("train.model.save")[0] | |
#train | |
self.logger.info("...training model") | |
self.regressor.fit(featData, outData) | |
rsqScore = self.regressor.score(featData, outData) | |
coef = self.regressor.coef_ | |
intc = self.regressor.intercept_ | |
result = (rsqScore, intc, coef) | |
if modelSave: | |
self.logger.info("...saving model") | |
modelFilePath = self.getModelFilePath() | |
joblib.dump(self.regressor, modelFilePath) | |
return result | |
def validate(self): | |
# create model | |
self.prepModel() | |
# prepare test data | |
(featData, outDataActual) = self.prepData("validate") | |
#predict | |
self.logger.info("...predicting") | |
outDataPred = self.regressor.predict(featData) | |
#error | |
rsqScore = self.regressor.score(featData, outDataActual) | |
result = (outDataPred, rsqScore) | |
return result | |
def predict(self): | |
""" | |
predict using trained model | |
""" | |
# create model | |
self.prepModel() | |
# prepare test data | |
featData = self.prepData("predict")[0] | |
#predict | |
self.logger.info("...predicting") | |
outData = self.regressor.predict(featData) | |
return outData | |
def prepData(self, mode): | |
""" | |
loads and prepares data for training and validation | |
""" | |
# parameters | |
key = mode + ".data.file" | |
dataFile = self.config.getStringConfig(key)[0] | |
key = mode + ".data.fields" | |
fieldIndices = self.config.getStringConfig(key)[0] | |
if not fieldIndices is None: | |
fieldIndices = strToIntArray(fieldIndices, ",") | |
key = mode + ".data.feature.fields" | |
featFieldIndices = self.config.getStringConfig(key)[0] | |
if not featFieldIndices is None: | |
featFieldIndices = strToIntArray(featFieldIndices, ",") | |
if not mode == "predict": | |
key = mode + ".data.out.field" | |
outFieldIndex = self.config.getIntConfig(key)[0] | |
#load data | |
(data, featData) = loadDataFile(dataFile, ",", fieldIndices, featFieldIndices) | |
if (self.config.getStringConfig("common.preprocessing")[0] == "scale"): | |
featData = sk.preprocessing.scale(featData) | |
outData = None | |
if not mode == "predict": | |
outData = extrColumns(data, outFieldIndex) | |
return (featData, outData) | |
def prepModel(self): | |
""" | |
load saved model or train model | |
""" | |
useSavedModel = self.config.getBooleanConfig("predict.use.saved.model")[0] | |
if (useSavedModel and not self.regressor): | |
# load saved model | |
self.logger.info("...loading saved model") | |
modelFilePath = self.getModelFilePath() | |
self.regressor = joblib.load(modelFilePath) | |
else: | |
# train model | |
self.train() | |
class LinearRegressor(BaseRegressor): | |
""" | |
linear regression | |
""" | |
def __init__(self, configFile): | |
defValues = {} | |
defValues["train.normalize"] = (False, None) | |
super(LinearRegressor, self).__init__(configFile, defValues) | |
def buildModel(self): | |
""" | |
builds model object | |
""" | |
self.logger.info("...building linear regression model") | |
normalize = self.config.getBooleanConfig("train.normalize")[0] | |
self.regressor = LinearRegression(normalize=normalize) | |
class ElasticNetRegressor(BaseRegressor): | |
""" | |
elastic net regression | |
""" | |
def __init__(self, configFile): | |
defValues = {} | |
defValues["train.alpha"] = (1.0, None) | |
defValues["train.loneratio"] = (0.5, None) | |
defValues["train.normalize"] = (False, None) | |
defValues["train.precompute"] = (False, None) | |
defValues["train.max.iter"] = (1000, None) | |
defValues["train.tol"] = (0.0001, None) | |
defValues["train.random.state"] = (None, None) | |
defValues["train.selection"] = ("cyclic", None) | |
super(ElasticNetRegressor, self).__init__(configFile, defValues) | |
def buildModel(self): | |
""" | |
builds model object | |
""" | |
self.logger.info("...building elastic net regression model") | |
alpha = self.config.getFloatConfig("train.alpha")[0] | |
loneratio = self.config.getFloatConfig("train.loneratio")[0] | |
normalize = self.config.getBooleanConfig("train.normalize")[0] | |
precompute = self.config.getBooleanConfig("train.precompute")[0] | |
maxIter = self.config.getIntConfig("train.max.iter")[0] | |
tol = self.config.getFloatConfig("train.tol")[0] | |
randState = self.config.getIntConfig("train.random.state")[0] | |
selection = self.config.getIntConfig("train.selection")[0] | |
self.regressor = ElasticNet(alpha=alpha, l1_ratio=loneratio, normalize=normalize, precompute=precompute, | |
max_iter=maxIter, tol=tol, random_state=randState, selection=selection) | |