#------------------ # Data Preparation #------------------ #Read datasets #Download the data from http://www.saedsayad.com/datasets/BikeRental.zip train <- read.csv("bike_rental_train.csv") test <- read.csv("bike_rental_test.csv") #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #Scatter plot pairs(~temp+humidity+windspeed+bike_rent_count, data=train, main="Scatterplot - train", col="darkgreen") pairs(~temp+humidity+windspeed+bike_rent_count, data=test, main="Scatterplot - test", col="brown") #---------------------------- # Decision Tree - Regression #---------------------------- library(rpart) library(rpart.plot) #Train model.Dtree <- rpart(bike_rent_count~., data = train, method="anova") prp(model.Dtree) #Residual plot res.Dtree = resid(model.Dtree) plot(train$temp, res.Dtree, ylab="Residuals", xlab="Temperature", main="Residual Plot") abline(0, 0) #Q-Q plot stdres.Dtree = scale(res.Dtree) qqnorm(stdres.Dtree, ylab="Standardized Residuals", xlab="Normal Scores", main="QQ Plot") qqline(stdres.Dtree) #Test pred.Dtree <- predict(model.Dtree, newdata=test) err.Dtree <- test$bike_rent_count - pred.Dtree rmse.Dtree <- sqrt(mean((err.Dtree^2))) #Errors histogram hist(err.Dtree, main="bike_rent_count", sub="(Actual-Predicted)", xlab="Error", breaks=10, col="darkred")