import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
df = pd.read_excel('data720.xlsx')
df.head()
carb | hydr | nitro | solid | gas | fixed | caloric | |
---|---|---|---|---|---|---|---|
0 | 53.5 | 5.8 | 2.00 | 7.5 | 72.6 | 19.9 | 24.48 |
1 | 54.4 | 5.9 | 1.10 | 4.2 | 77.4 | 18.4 | 24.60 |
2 | 53.3 | 5.6 | 1.43 | 6.3 | 68.1 | 25.6 | 21.22 |
3 | 52.9 | 5.4 | 1.74 | 4.6 | 67.5 | 27.9 | 20.87 |
4 | 49.9 | 5.3 | 0.80 | 3.4 | 72.0 | 24.6 | 19.57 |
correlations = df.corr(method='pearson')
print(correlations)
carb hydr nitro solid gas fixed caloric carb 1.000000 0.613948 -0.097076 -0.868716 0.720015 0.487921 0.905313 hydr 0.613948 1.000000 -0.007732 -0.589411 0.597493 0.122170 0.667561 nitro -0.097076 -0.007732 1.000000 0.148930 -0.132745 -0.067657 -0.006727 solid -0.868716 -0.589411 0.148930 1.000000 -0.879624 -0.466989 -0.783878 gas 0.720015 0.597493 -0.132745 -0.879624 1.000000 -0.006811 0.659627 fixed 0.487921 0.122170 -0.067657 -0.466989 -0.006811 1.000000 0.421342 caloric 0.905313 0.667561 -0.006727 -0.783878 0.659627 0.421342 1.000000
carb = df.carb
hydr = df.hydr
nitro = df.nitro
solid = df.solid
gas = df.gas
fixed = df.fixed
caloric = df.caloric
CHN = np.column_stack((carb, hydr, nitro))
SGF = np.column_stack((solid, gas, fixed))
data_1 = np.column_stack((CHN,))
data_2 = np.column_stack((SGF,))
data_3 = np.column_stack((CHN,SGF))
data_4 = np.column_stack((carb, hydr, solid, fixed))
x = pd.DataFrame(data_4, )
y = pd.DataFrame(caloric)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=42)
print(f'input data size: {len(x)}')
print(f'train data size: {len(y_train)}')
print(f'test data size: {len(X_test)}')
input data size: 720 train data size: 540 test data size: 180
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
ss = StandardScaler()
#ss = MinMaxScaler(feature_range= (-1, 1))
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)
x = np.concatenate((X_train,X_test))
y_train = ss.fit_transform(y_train)
y_test = ss.transform(y_test)
y = np.concatenate((y_train,y_test))
regr = MLPRegressor(random_state=42, alpha=0.0001, max_iter=700).fit(X_train, y_train.ravel())
wow = regr.predict(X_test)
y_test_inv=ss.inverse_transform(y_test)
wow_inv=ss.inverse_transform(wow)
plt.plot(y_test_inv.ravel(), label="Real")
plt.plot(wow_inv, label="Prediction")
plt.legend()
plt.show()
print(f"MSE = {mean_squared_error(wow_inv, y_test_inv)}")
print(f"R21 = {r2_score(wow, y_test)}")
MSE = 0.7646528987695613 R21 = 0.9012383719227771
plt.scatter(y_test_inv.ravel(), wow_inv, label=f"R21 = {r2_score(wow, y_test)}")
plt.plot(y_test_inv.ravel(), y_test_inv.ravel(),color='red')
plt.legend()
plt.show()
V1 = []
V2 = []
n1 = []
for i in range(1, 100):
j=i*5
regr = MLPRegressor(hidden_layer_sizes=(j,), random_state=42, max_iter=700).fit(X_train, y_train.ravel())
wow1 = regr.predict(X_test)
wow2 = regr.predict(X_train)
y1=y_test.ravel()
y2=y_train.ravel()
mae1 = mean_squared_error(wow1, y1)
mae2 = mean_squared_error(wow2, y2)
###################################
n1.append(j)
V1.append(mae1)
V2.append(mae2)
plt.scatter(n1, V1, label = "1D_ANN, hidden_layer_sizes=(j,), test prediction")
plt.scatter(n1, V2, label = "1D_ANN, hidden_layer_sizes=(j,), train prediction")
plt.legend()
plt.show()
regr = MLPRegressor(random_state=42,
max_iter=700,
hidden_layer_sizes= (100,),
activation='relu',
solver='adam',
alpha=0.00001,
batch_size=200,
learning_rate_init=0.001,
shuffle=True,
warm_start=False,
early_stopping=False,
beta_1=0.58,
beta_2=0.96,
epsilon=1e-08
).fit(X_train, y_train.ravel())
wow = regr.predict(X_test)
y_test_inv=ss.inverse_transform(y_test)
wow_inv=ss.inverse_transform(wow)
plt.plot(y_test_inv.ravel(), label="Real")
plt.plot(wow_inv, label="Prediction")
plt.legend()
plt.show()
print(f"MSE = {mean_squared_error(wow_inv, y_test_inv)}")
print(f"R22 = {r2_score(wow, y_test)}")
MSE = 0.7606824991520758 R22 = 0.9023037713556807
plt.scatter(y_test_inv.ravel(), wow_inv, label=f"R22 = {r2_score(wow, y_test)}")
plt.plot(y_test_inv.ravel(), y_test_inv.ravel(),color='red')
plt.legend()
plt.show()
your_data = np.column_stack((y_test_inv.ravel(), wow_inv))
your_df = pd.DataFrame(your_data)
your_df.to_csv(r'D:\ANN_prediction.csv')
V3 = []
n2 = []
for i in range(0, 52):
j=i*10
regr = MLPRegressor(random_state=42,
max_iter=700,
hidden_layer_sizes= (100,),
activation='relu',
solver='adam',
alpha=0.00001,
#batch_size=200,
learning_rate_init=0.001,
shuffle=True,
warm_start=False,
early_stopping=False,
beta_1=0.58,
beta_2=0.96,
epsilon=1e-08).fit(X_train[0:len(X_train)-j], y_train.ravel()[0:len(X_train)-j])
wow = regr.predict(X_test)
R2 = r2_score(wow, y_test)
###################################
n2.append((len(X_train)-j)+len(X_test))
V3.append(R2)
plt.scatter(n2, V3, label = "number of samples")
plt.legend()
plt.show()
HHV3=0.4373*carb-1.6701
print(f"R23 = {r2_score(caloric, HHV3)}")
R23 = 0.7966337572090881
plt.scatter(caloric, HHV3, label="Eq.3, R^2= 0.797")
plt.plot(caloric, caloric, color='red')
plt.legend()
plt.show()
HHV7=0.00355*carb*carb-0.232*carb-2.230*hydr+0.0512*carb*hydr+0.131*nitro+20.600
print(f"R24 = {r2_score(caloric, HHV7)}")
R24 = 0.7298327644906875
plt.scatter(caloric, HHV7, label="Eq.7, R^2= 0.730")
plt.plot(caloric, caloric, color='red')
plt.legend()
plt.show()
from sklearn.cross_decomposition import PLSRegression
pls = PLSRegression(n_components=4, max_iter=1000, tol=1e-06)
pls.fit(X_train,y_train)
pls_pred = pls.predict(X_test)
y_test_inv=ss.inverse_transform(y_test)
pls_pred_inv=ss.inverse_transform(pls_pred)
plt.plot(y_test_inv.ravel(), label="Real")
plt.plot(pls_pred_inv, label="Prediction")
plt.legend()
plt.show()
print(f"MSE = {mean_squared_error(pls_pred_inv, y_test_inv)}")
print(f"R25 = {r2_score(pls_pred, y_test)}")
MSE = 0.933088110064856 R25 = 0.8691388077553671
plt.scatter(y_test_inv.ravel(), pls_pred_inv, label="PLS, R^2=0.869")
plt.plot(y_test_inv.ravel(), y_test_inv.ravel(),color='red')
plt.legend()
plt.show()
from sklearn.model_selection import GridSearchCV
mlp = MLPRegressor(max_iter=70000)
param_list = {"hidden_layer_sizes": [(10,),(50,), (100,), (300,)],
"activation": ["identity", "logistic", "tanh", "relu"],
"solver": ["sgd", "adam"],
"alpha": [0.00005,0.0005]}
n_folds = 5
gridCV = GridSearchCV(estimator=mlp, param_grid=param_list, cv=n_folds)
gridCV.fit(X_train, y_train.ravel())
gridCV.best_params_
{'activation': 'relu', 'alpha': 5e-05, 'hidden_layer_sizes': (50,), 'solver': 'adam'}
from sklearn.model_selection import RandomizedSearchCV
mlp = MLPRegressor(max_iter=70000)
a = np.arange(7, dtype=float)
b = np.ones(7, dtype=float)
alpha1 = (10*b)**(-1*a)
param_list = {"hidden_layer_sizes": list(range(1, 500)) + [None],
"activation": ["identity", "logistic", "tanh", "relu"],
"solver": ["sgd", "adam"],
"alpha": alpha1}
n_folds = 15
RandomCV = RandomizedSearchCV(estimator=mlp, param_distributions=param_list, cv=n_folds)
RandomCV.fit(X_train, y_train.ravel())
RandomCV.best_params_
{'solver': 'adam', 'hidden_layer_sizes': 184, 'alpha': 0.1, 'activation': 'relu'}