import pandas as pd
import csv
import json
from matplotlib import pyplot as plt
import numpy as np
import plotly.graph_objs as go
import plotly as plty
from sklearn import preprocessing
import sklearn.feature_extraction.text as tfidf
from sklearn.model_selection import train_test_split
from sklearn import datasets,linear_model, preprocessing,utils
from sklearn.metrics import mean_squared_error,r2_score
from sklearn import metrics
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.multiclass import unique_labels
import itertools
from sklearn import svm
import collections
from sklearn.naive_bayes import MultinomialNB
# Plot an ROC. pred - the predictions, y - the expected output.
def plot_roc(pred,y):
fpr, tpr, thresholds = roc_curve(y, pred)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv("drive/My Drive/foreveralone.csv")
df.head()
df.info()
df.describe()
df.duplicated().any()
df.isna().any()
df.job_title.value_counts()
df.info()
# change dataype to int
df['friends'] = df['friends'].astype(np.int64)
df.info()
df.dropna(inplace=True)
df.isna().any()
# strip stings with white space
df['job_title'] = df.job_title.str.strip()
# Function to replace job_title values
def replace_text(what, to):
df.replace(what, to, inplace= True)
replace_text('student', 'Student')
replace_text('none', 'None')
replace_text("N/a", 'None')
replace_text('na', 'None')
replace_text('-', 'None')
replace_text('.', 'None')
replace_text('*', 'None')
replace_text('ggg', 'None')
df.job_title.value_counts()
df.gender.value_counts()
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
objects = ('Male', 'Female', 'Transgender Male', 'Transgender Female')
y_pos = np.arange(len(objects))
performance = df.gender.value_counts()
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Count')
plt.title('Gender Frequency')
plt.show()
# sexuality freqency
df.sexuallity.value_counts()
objects = ('Straight', 'Bisexual','Gay/Lesbian')
y_pos = np.arange(len(objects))
performance = df.sexuallity.value_counts()
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Count')
plt.title('Sexuality Frequency')
plt.show()
# body weight
df.bodyweight.value_counts()
objects = ('Normal weight', 'Overweight','Underweight','Obese')
y_pos = np.arange(len(objects))
performance = df.bodyweight.value_counts()
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Count')
plt.title('bodyweight Frequency')
plt.show()
# depressed
df.depressed.value_counts()
objects = ('Yes', 'No')
y_pos = np.arange(len(objects))
performance = df.depressed.value_counts()
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Count')
plt.title('depressed Frequency')
plt.show()
df.social_fear.value_counts()
objects = ('Yes', 'No')
y_pos = np.arange(len(objects))
performance = df.depressed.value_counts()
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Count')
plt.title('social_fear Frequency')
plt.show()
df.attempt_suicide.value_counts()
objects = ('No', 'Yes')
y_pos = np.arange(len(objects))
performance = df.attempt_suicide.value_counts()
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Count')
plt.title('attempt_suicide Frequency')
plt.show()
df['age'].hist()
df['friends'].hist()
male = df[df['gender'] == 'Male' ]
female = df[df['gender'] == 'Female' ]
df.head()
df.dtypes
df['sexuallity'].unique()
df['gender'].unique()
df['race'].unique()
df['bodyweight'].unique()
df['virgin'].unique()
df['income'].unique()
df['prostitution_legal'].unique()
df['pay_for_sex'].unique()
df['depressed'].unique()
df['employment'].unique()
df['edu_level'].unique()
df['job_title'].unique()
df_new = df
df_new.head()
df['what_help_from_others'].unique()
df_new = df_new.drop(columns=['time','what_help_from_others'])
df_new = df_new.drop(df_new.index[445])
df_new.head()
df_new.info()
location = df_new.loc[df_new['race'] == 'First two answers. Gender is androgyne, not male; sexuality is asexual, not bi.']
df_new = df_new.drop(location.index)
df_new.loc[df_new['race'] == 'First two answers. Gender is androgyne, not male; sexuality is asexual, not bi.']
df_new = df_new.drop(columns=['improve_yourself_how'])
df_for_sampling = df_new.copy(deep=True) #Important
df_new.head()
# Label Encoding Only for Y (output)
def encode_text_index(df_new, name):
le = preprocessing.LabelEncoder()
df_new[name] = le.fit_transform(df_new[name])
return le.classes_
my_list = ['attempt_suicide']
for i in my_list:
encode_text_index(df_new,i)
# Encode text values to dummy variables(i.e. [1,0,0],[0,1,0],[0,0,1] for red,green,blue)
# One Hot encoding
def encode_text_dummy(df_new, name):
dummies = pd.get_dummies(df_new[name])
for x in dummies.columns:
dummy_name = "{}-{}".format(name, x)
df_new[dummy_name] = dummies[x]
df_new.drop(name, axis=1, inplace=True)
mylist_2 = ["gender","age","sexuallity","income","race","bodyweight","virgin","prostitution_legal","pay_for_sex","social_fear","depressed","employment","job_title","edu_level"]
for t in mylist_2:
print(t)
encode_text_dummy(df_new,t)
df_new.head()
Y_numpy_array = df_new['attempt_suicide'].values # Y is ready for model train test split
df_for_x = df_new.drop(columns=['attempt_suicide'])
X_numpy_array = df_for_x.values # X is ready for model train test split
Y_numpy_array[:2]
x_train,x_test,y_train,y_test = train_test_split(X_numpy_array,Y_numpy_array, test_size=0.3,random_state=42)
type(y_train)
print(utils.multiclass.type_of_target(y_train))
x_train.shape
x_test.shape
y_train.shape
y_test.shape
logreg = linear_model.LogisticRegression()
logreg.fit(x_train, y_train)
y_pred_logistic = logreg.predict(x_test)
print(y_pred_logistic[:5])
print(y_test[:5])
# The coefficients
#print('Coefficients: \n', logreg.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% mean_squared_error(y_test, y_pred_logistic))
score = metrics.accuracy_score(y_test, y_pred_logistic)
print("Accuracy score: {}".format(score))
score_tf_precision_stopping = metrics.precision_score(y_test, y_pred_logistic, average= "weighted")
print("Precision score: {}".format(score_tf_precision_stopping))
score_tf_recall_stopping = metrics.recall_score(y_test, y_pred_logistic, average= "weighted")
print("Recall score: {}".format(score_tf_recall_stopping))
score_tf_f1_stopping = metrics.f1_score(y_test, y_pred_logistic, average= "weighted")
print("F1 score: {}".format(score_tf_f1_stopping))
len(y_test)
len(y_pred_logistic)
y_test[:5]
y_pred_logistic[:5]
y_test.shape
y_pred_logistic.shape
cf =confusion_matrix(y_test,y_pred_logistic)
plt.imshow(cf,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(y_test))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf.max() / 2.
for i,j in itertools.product(range(cf.shape[0]),range(cf.shape[1])):
plt.text(j,i,format(cf[i,j],'d'),horizontalalignment='center',color='white' if cf[i,j] >thresh else 'black')
plt.show();
df['attempt_suicide'].unique()
type(y_train)
print(utils.multiclass.type_of_target(y_train))
type(y_pred_logistic)
print(utils.multiclass.type_of_target(y_pred_logistic))
SVM_classifier = svm.SVC()
SVM_classifier.fit(x_train, y_train)
y_pred_SVM = SVM_classifier.predict(x_test)
# The coefficients
#print('Coefficients: \n', logreg.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% mean_squared_error(y_test, y_pred_SVM))
# Explained variance score: 1 is perfect prediction
print('R2 score: %.2f' % r2_score(y_test, y_pred_SVM))
cf =confusion_matrix(y_test,y_pred_SVM)
plt.imshow(cf,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(y_test))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf.max() / 2.
for i,j in itertools.product(range(cf.shape[0]),range(cf.shape[1])):
plt.text(j,i,format(cf[i,j],'d'),horizontalalignment='center',color='white' if cf[i,j] >thresh else 'black')
plt.show();
nb = MultinomialNB()
nb.fit(x_train,y_train)
y_nb_predict = nb.predict(x_test)
score = metrics.accuracy_score(y_test, y_nb_predict)
print("Accuracy score: {}".format(score))
score_tf_precision_stopping = metrics.precision_score(y_test, y_nb_predict, average= "weighted")
print("Precision score: {}".format(score_tf_precision_stopping))
score_tf_recall_stopping = metrics.recall_score(y_test, y_nb_predict, average= "weighted")
print("Recall score: {}".format(score_tf_recall_stopping))
score_tf_f1_stopping = metrics.f1_score(y_test, y_nb_predict, average= "weighted")
print("F1 score: {}".format(score_tf_f1_stopping))
cf1 =confusion_matrix(y_test, y_nb_predict)
plt.imshow(cf1,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(y_test.flatten()))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf1.max() / 2.
for i,j in itertools.product(range(cf1.shape[0]),range(cf1.shape[1])):
plt.text(j,i,format(cf1[i,j],'d'),horizontalalignment='center',color='white' if cf1[i,j] >thresh else 'black')
plt.show();
#knn
from sklearn.neighbors import KNeighborsClassifier
knn_extra = KNeighborsClassifier(n_neighbors=10)
knn_extra.fit(x_train,y_train)
y_pred_knn_e = knn_extra.predict(x_test)
score = metrics.accuracy_score(y_test, y_pred_knn_e)
print("Accuracy score: {}".format(score))
score_tf_precision_stopping = metrics.precision_score(y_test, y_pred_knn_e, average= "weighted")
print("Precision score: {}".format(score_tf_precision_stopping))
score_tf_recall_stopping = metrics.recall_score(y_test, y_pred_knn_e, average= "weighted")
print("Recall score: {}".format(score_tf_recall_stopping))
score_tf_f1_stopping = metrics.f1_score(y_test,y_pred_knn_e, average= "weighted")
print("F1 score: {}".format(score_tf_f1_stopping))
cf1 =confusion_matrix(y_test,y_pred_knn_e)
plt.imshow(cf1,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(y_test.flatten()))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf1.max() / 2.
for i,j in itertools.product(range(cf1.shape[0]),range(cf1.shape[1])):
plt.text(j,i,format(cf1[i,j],'d'),horizontalalignment='center',color='white' if cf1[i,j] >thresh else 'black')
plt.show();
import keras
import keras
from keras.models import Sequential
from keras import regularizers
from keras.layers.core import Dense, Activation
from keras.layers import Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint
num_classes = 2
#Convert class vectors to one hot format
y_train1 = keras.utils.to_categorical(y_train,num_classes)
y_test1 = keras.utils.to_categorical(y_test,num_classes)
print(x_train.shape)
print(y_test1.shape)
checkpointer2 = ModelCheckpoint(filepath="./best1.hdf5", verbose=0, save_best_only=True) # save best model
for i in range(5):
print(i)
model = Sequential()
model.add(Dense(25, input_dim=x_train.shape[1], activation='relu')) # Hidden 1
model.add(Dense(10, activation='relu')) # Hidden 2
model.add(Dense(y_train1.shape[1], activation='softmax'))
#adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=6, verbose=2, mode='auto')
model.compile(loss="categorical_crossentropy", optimizer='adam')
model.fit(x_train,y_train1,callbacks=[monitor,checkpointer2],validation_data=(x_test, y_test1),epochs=10)
model.load_weights('./best1.hdf5')
y_pred_c = model.predict(x_test)
y_pred_c = np.argmax(y_pred_c,axis=1)
y_test_c= np.argmax(y_test1,axis=1)
score = metrics.accuracy_score(y_test_c, y_pred_c)
print("Accuracy score: {}".format(score))
score = metrics.accuracy_score(y_test_c, y_pred_c)
print("Accuracy score: {}".format(score))
score_tf_precision_stopping = metrics.precision_score(y_test_c, y_pred_c, average= "weighted")
print("Precision score: {}".format(score_tf_precision_stopping))
score_tf_recall_stopping = metrics.recall_score(y_test_c, y_pred_c, average= "weighted")
print("Recall score: {}".format(score_tf_recall_stopping))
score_tf_f1_stopping = metrics.f1_score(y_test_c, y_pred_c, average= "weighted")
print("F1 score: {}".format(score_tf_f1_stopping))
cf1 =confusion_matrix(y_test_c, y_pred_c)
plt.imshow(cf1,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(y_test_c.flatten()))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf1.max() / 2.
for i,j in itertools.product(range(cf1.shape[0]),range(cf1.shape[1])):
plt.text(j,i,format(cf1[i,j],'d'),horizontalalignment='center',color='white' if cf1[i,j] >thresh else 'black')
plt.show();
y_df_oversampling = df_for_sampling['attempt_suicide']
y_df_oversampling_array = y_df_oversampling.values
y_df_oversampling_array
x_df_oversampling = df_for_sampling.drop(columns="attempt_suicide")
x_df_oversampling_array = x_df_oversampling.values
x_df_oversampling_array[:1]
y_df_oversampling
x_df_oversampling
x_train_over_sampling,x_test_over_sampling,y_train_over_sampling,y_test_oversampling = train_test_split(x_df_oversampling_array,y_df_oversampling_array, test_size=0.3,random_state=42)
x_train_over_sampling.shape
x_test_over_sampling.shape
y_train_over_sampling.shape
y_test_oversampling.shape
>>> from imblearn.over_sampling import SMOTENC
>>> smote_nc = SMOTENC(categorical_features=[0,1,2,3,4,5,6,7,8,10,11,12,13,14], random_state=0)
>>> x_resampled, y_resampled = smote_nc.fit_resample(x_train_over_sampling, y_train_over_sampling)
>>> print(sorted(collections.Counter(y_resampled).items()))
y_column = ['attempt_suicide']
x_columns = ["gender","sexuallity","age","income","race","bodyweight","virgin","prostitution_legal","pay_for_sex","friends","social_fear","depressed","employment","job_title","edu_level"]
x_resampled_dataframe = pd.DataFrame(x_resampled, columns=x_columns)
x_resampled_dataframe.shape
x_resampled_dataframe[:1]
y_resampled_dataframe = pd.DataFrame(y_resampled, columns=y_column)
y_resampled_dataframe.shape
x_test_dataframe = pd.DataFrame(x_test_over_sampling, columns=x_columns)
x_test_dataframe.shape
y_test_dataframe = pd.DataFrame(y_test_oversampling, columns=y_column)
y_test_dataframe.shape
# Label Encoding Only for Y (output)
def encode_text_index1(df_arg1, name):
le = preprocessing.LabelEncoder()
df_arg1[name] = le.fit_transform(df_arg1[name])
return le.classes_
my_list = ['attempt_suicide']
for i in my_list:
encode_text_index1(y_resampled_dataframe,i)
encode_text_index1(y_test_dataframe,i)
def encode_text_dummy1(df_arg, name):
dummies = pd.get_dummies(df_arg[name])
print(dummies)
for x in dummies.columns:
dummy_name = "{}-{}".format(name, x)
df_arg[dummy_name] = dummies[x]
df_arg.drop(name, axis=1, inplace=True)
for x in dummies.columns:
dummy_name = "{}-{}".format(name, x)
x_test_dataframe[dummy_name] = dummies[x]
x_test_dataframe.drop(name, axis=1, inplace=True)
print(x_resampled_dataframe.shape)
print(x_test_dataframe.shape)
mylist_3 = ["gender","age","sexuallity","income","race","bodyweight","virgin","prostitution_legal","pay_for_sex","social_fear","depressed","employment","job_title","edu_level"]
for t in mylist_3:
encode_text_dummy1(x_resampled_dataframe,t)
#encode_text_dummy1(x_test_dataframe,t)
print(x_resampled_dataframe.shape)
print(x_test_dataframe.shape)
final_x_train_resampled = x_resampled_dataframe.values
final_y_train_resampled = y_resampled_dataframe.values
final_x_test_resampled = x_test_dataframe.values
final_y_test_resampled = y_test_dataframe.values
final_y_train_resampled = final_y_train_resampled.flatten()
final_y_test_resampled = final_y_test_resampled.flatten()
logreg_resampled = linear_model.LogisticRegression()
final_y_train_resampled[:5]
logreg_resampled.fit(final_x_train_resampled, final_y_train_resampled.flatten())
y_pred_logistic_resampled = logreg_resampled.predict(final_x_test_resampled)
# The coefficients
#print('Coefficients: \n', logreg.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% mean_squared_error(final_y_test_resampled, y_pred_logistic_resampled))
score = metrics.accuracy_score(final_y_test_resampled, y_pred_logistic_resampled)
print("Accuracy score: {}".format(score))
score_tf_precision_stopping = metrics.precision_score(final_y_test_resampled, y_pred_logistic_resampled, average= "weighted")
print("Precision score: {}".format(score_tf_precision_stopping))
score_tf_recall_stopping = metrics.recall_score(final_y_test_resampled, y_pred_logistic_resampled, average= "weighted")
print("Recall score: {}".format(score_tf_recall_stopping))
score_tf_f1_stopping = metrics.f1_score(final_y_test_resampled, y_pred_logistic_resampled, average= "weighted")
print("F1 score: {}".format(score_tf_f1_stopping))
cf1 =confusion_matrix(final_y_test_resampled,y_pred_logistic_resampled)
plt.imshow(cf1,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(final_y_test_resampled.flatten()))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf1.max() / 2.
for i,j in itertools.product(range(cf1.shape[0]),range(cf1.shape[1])):
plt.text(j,i,format(cf1[i,j],'d'),horizontalalignment='center',color='white' if cf1[i,j] >thresh else 'black')
plt.show();
nb = MultinomialNB()
nb.fit(final_x_train_resampled,final_y_train_resampled)
y_nb_predict = nb.predict(final_x_test_resampled)
score = metrics.accuracy_score(final_y_test_resampled, y_nb_predict)
print("Accuracy score: {}".format(score))
score_tf_precision_stopping = metrics.precision_score(final_y_test_resampled, y_nb_predict, average= "weighted")
print("Precision score: {}".format(score_tf_precision_stopping))
score_tf_recall_stopping = metrics.recall_score(final_y_test_resampled, y_nb_predict, average= "weighted")
print("Recall score: {}".format(score_tf_recall_stopping))
score_tf_f1_stopping = metrics.f1_score(final_y_test_resampled,y_nb_predict, average= "weighted")
print("F1 score: {}".format(score_tf_f1_stopping))
cf1 =confusion_matrix(final_y_test_resampled,y_nb_predict)
plt.imshow(cf1,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(final_y_test_resampled.flatten()))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf1.max() / 2.
for i,j in itertools.product(range(cf1.shape[0]),range(cf1.shape[1])):
plt.text(j,i,format(cf1[i,j],'d'),horizontalalignment='center',color='white' if cf1[i,j] >thresh else 'black')
plt.show();
#knn
from sklearn.neighbors import KNeighborsClassifier
knn_extra = KNeighborsClassifier(n_neighbors=10)
knn_extra.fit(final_x_train_resampled,final_y_train_resampled)
y_pred_knn_e = knn_extra.predict(final_x_test_resampled)
score = metrics.accuracy_score(final_y_test_resampled, y_pred_knn_e)
print("Accuracy score: {}".format(score))
score_tf_precision_stopping = metrics.precision_score(final_y_test_resampled, y_pred_knn_e, average= "weighted")
print("Precision score: {}".format(score_tf_precision_stopping))
score_tf_recall_stopping = metrics.recall_score(final_y_test_resampled, y_pred_knn_e, average= "weighted")
print("Recall score: {}".format(score_tf_recall_stopping))
score_tf_f1_stopping = metrics.f1_score(final_y_test_resampled,y_pred_knn_e, average= "weighted")
print("F1 score: {}".format(score_tf_f1_stopping))
cf1 =confusion_matrix(final_y_test_resampled,y_pred_knn_e)
plt.imshow(cf1,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(final_y_test_resampled.flatten()))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf1.max() / 2.
for i,j in itertools.product(range(cf1.shape[0]),range(cf1.shape[1])):
plt.text(j,i,format(cf1[i,j],'d'),horizontalalignment='center',color='white' if cf1[i,j] >thresh else 'black')
plt.show();
import keras
import keras
from keras.models import Sequential
from keras import regularizers
from keras.layers.core import Dense, Activation
from keras.layers import Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint
num_classes = 2
#Convert class vectors to one hot format
final_y_train1_resampled = keras.utils.to_categorical(final_y_train_resampled,num_classes)
final_y_test1_resampled = keras.utils.to_categorical(final_y_test_resampled,num_classes)
print(final_x_train_resampled.shape)
print(final_y_test1_resampled.shape)
checkpointer2 = ModelCheckpoint(filepath="./best1.hdf5", verbose=0, save_best_only=True) # save best model
for i in range(5):
print(i)
model = Sequential()
model.add(Dense(25, input_dim=final_x_train_resampled.shape[1], activation='relu')) # Hidden 1
model.add(Dense(10, activation='relu')) # Hidden 3
model.add(Dense(final_y_train1_resampled.shape[1], activation='softmax'))
#adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=6, verbose=2, mode='auto')
model.compile(loss="categorical_crossentropy", optimizer='adam')
model.fit(final_x_train_resampled,final_y_train1_resampled,callbacks=[monitor,checkpointer2],validation_data=(final_x_test_resampled, final_y_test1_resampled),epochs=10)
model.load_weights('./best1.hdf5')
y_pred_c = model.predict(final_x_test_resampled)
y_pred_c = np.argmax(y_pred_c,axis=1)
y_test_c= np.argmax(final_y_test1_resampled,axis=1)
score = metrics.accuracy_score(y_test_c, y_pred_c)
print("Accuracy score: {}".format(score))
score = metrics.accuracy_score(y_test_c, y_pred_c)
print("Accuracy score: {}".format(score))
score_tf_precision_stopping = metrics.precision_score(y_test_c, y_pred_c, average= "weighted")
print("Precision score: {}".format(score_tf_precision_stopping))
score_tf_recall_stopping = metrics.recall_score(y_test_c, y_pred_c, average= "weighted")
print("Recall score: {}".format(score_tf_recall_stopping))
score_tf_f1_stopping = metrics.f1_score(y_test_c, y_pred_c, average= "weighted")
print("F1 score: {}".format(score_tf_f1_stopping))
cf1 =confusion_matrix(y_test_c, y_pred_c)
plt.imshow(cf1,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix without Normalization')
plt.xlabel('Predicted')
plt.ylabel('Actual')
tick_marks = np.arange(len(set(y_test_c.flatten()))) # length of classes
class_labels = ['0','1']
tick_marks
plt.xticks(tick_marks,class_labels)
plt.yticks(tick_marks,class_labels)
# plotting text value inside cells
thresh = cf1.max() / 2.
for i,j in itertools.product(range(cf1.shape[0]),range(cf1.shape[1])):
plt.text(j,i,format(cf1[i,j],'d'),horizontalalignment='center',color='white' if cf1[i,j] >thresh else 'black')
plt.show();
https://www.kaggle.com/kingburrito666/the-demographic-rforeveralone-dataset/home
https://imbalancedlearn.readthedocs.io/en/stable/generated/imblearn.over_sampling.SMOTENC.html
https://scikit-learn.org/stable/
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html