import numpy as np import pandas as pd from sklearn import tree from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn.linear_model import LinearRegression from lineartree import LinearTreeRegressor from lineartree import LinearForestRegressor from lineartree import LinearBoostRegressor from sklearn.neural_network import MLPRegressor from sklearn.metrics import mean_squared_error from matplotlib import pyplot as plt import seaborn as sns #Multi-layer Perceptron def myNeuralNetwork(X,y): clf = MLPRegressor(random_state=1, max_iter=500) clf.fit(X, y) esipred = clf.predict(X) clf_score = clf.score(y,esipred) return esipred, clf_score def myRandomForest(X,y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #Finding the relation between tree depth and MSE mses = [] clf = RandomForestRegressor() clf.fit(X_train, y_train) for depth in range(1, 24): d_tree_reg = RandomForestRegressor(max_depth=depth,random_state=0) d_tree_reg.fit(X_train, y_train) tree_predictions = d_tree_reg.predict(X_test) mses.append(mean_squared_error(y_test, tree_predictions)) tree_depths = [depth for depth in range(1, 24)] fig = plt.figure(figsize=(10, 6)) plt.grid() plt.plot(tree_depths, mses,'o-') plt.xlabel("Tree Depth") plt.ylabel("Mean Square Error") plt.title("Random Forest Depth Testing") fig.savefig("RandomForest_TreeDepthTesting.png") mses_min = min(mses) opt_depth = mses.index(mses_min) + 1 print("optimal tree depth:", opt_depth, "with mse of", mses_min) clf = RandomForestRegressor(max_depth=opt_depth,random_state=0) clf.fit(X, y) esipred = clf.predict(X) clf_score = clf.score(y,esipred) return esipred, clf_score def myDecisionTree(X,y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #Finding the relation between tree depth and MSE mses = [] clf = tree.DecisionTreeRegressor() clf.fit(X_train, y_train) print("Max tree depth =", clf.tree_.max_depth) for depth in range(1, (clf.tree_.max_depth + 1)): d_tree_reg = tree.DecisionTreeRegressor(max_depth=depth) d_tree_reg.fit(X_train, y_train) tree_predictions = d_tree_reg.predict(X_test) mses.append(mean_squared_error(y_test, tree_predictions)) tree_depths = [depth for depth in range(1, (clf.tree_.max_depth + 1))] fig = plt.figure(figsize=(10, 6)) plt.grid() plt.plot(tree_depths, mses,'o-') plt.xlabel("Tree Depth") plt.ylabel("Mean Square Error") plt.title("Decision Tree DepthTesting") fig.savefig("DecisionTree_TreeDepthTesting.png") mses_min = min(mses) opt_depth = mses.index(mses_min) + 1 print("optimal tree depth:", opt_depth, "with mse of", mses_min) clf = tree.DecisionTreeRegressor(max_depth=opt_depth) clf.fit(X, y) esipred = clf.predict(X) #draw decision tree structure fig = plt.figure(figsize=(25,20)) _ = tree.plot_tree(clf, filled=True) fig.savefig("decision_tree.png") clf_score = clf.score(y,esipred) return esipred, clf_score def myLinearTree(X,y): clf = LinearTreeRegressor(base_estimator=LinearRegression()) clf.fit(X, y) esipred = clf.predict(X) clf_score = clf.score(y,esipred) return esipred, clf_score def myLinearRandomForest(X,y): clf = LinearForestRegressor(base_estimator=LinearRegression()) clf.fit(X, y) esipred = clf.predict(X) clf_score = clf.score(y,esipred) return esipred, clf_score def mylinearboost(X,y): #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) clf = LinearBoostRegressor(base_estimator=LinearRegression()) clf.fit(X, y) esipred = clf.predict(X) clf_score = clf.score(y,esipred) return esipred, clf_score def myPlotScore(compare,alg_str, score): fig = sns.jointplot(compare.ESI_Smoothed, compare.ESIpredict, kind = "reg") plt.xlabel("ESI_Smoothed", fontsize = 15) plt.ylabel("ESIpredict_"+alg_str,fontsize = 15) xmin = compare['ESI_Smoothed'].min() xmax = compare['ESI_Smoothed'].max() xpos = (xmax + xmin) / 2. + 0.2 ymin = compare['ESIpredict'].min() ypos = ymin + 0.05 #adding R_square value the plot rsquare_str = 'R\u00b2 = ' + "{:.4f}".format(score) print(rsquare_str) plt.text(xpos, ypos, rsquare_str, fontsize = 12) # plt.title("Predicted compared to Truth -- " + alg_str) fig.savefig(alg_str + "_Jointmap.png") return