#------------------ # Data Preparation #------------------ #Read datasets #Download the data from http://www.saedsayad.com/datasets/CreditData.zip train <- read.csv("Credit_train.csv") test <- read.csv("Credit_test.csv") #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #----------------------------------------------------------------- # Data Exploration - Bivariate analysis - Numerical and Numerical #----------------------------------------------------------------- #BUSAGE, MAXLINEUTIL, DAYSDELQ, TOTACBA pairs(~BUSAGE+MAXLINEUTIL+DAYSDELQ+TOTACBAL, data=train, main="Scatterplot - train", col="darkgreen") pairs(~BUSAGE+MAXLINEUTIL+DAYSDELQ+TOTACBAL, data=test, main="Scatterplot - test", col="brown") #Correlation train <- na.omit(train) cor(train$DAYSDELQ,train$MAXLINEUTIL) #Covariance and Variance train <- na.omit(train) cov(train$DAYSDELQ,train$MAXLINEUTIL) var(train$DAYSDELQ,train$DAYSDELQ) #Correlation and Covariance matrix d <- train d$DEFAULT <- NULL d$BUSTYPE <- NULL cor(d) cov(d)