#------------------ # Data Preparation #------------------ #Read datasets #Download the data from http://www.saedsayad.com/datasets/BikeRental.zip train <- read.csv("bike_rental_train.csv") test <- read.csv("bike_rental_test.csv") #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #Scatter plot pairs(~temp+humidity+windspeed+bike_rent_count, data=train, main="Scatterplot - train", col="darkgreen") pairs(~temp+humidity+windspeed+bike_rent_count, data=test, main="Scatterplot - test", col="brown") #---------------------------- # Multiple Linear Regression #---------------------------- #Train model.MLR <- lm(bike_rent_count ~., data=train) summary(model.MLR) #Residual plot res.MLR = resid(model.MLR) plot(train$temp, res.MLR, ylab="Residuals", xlab="Temperature", main="Residual Plot") abline(0, 0) #Q-Q plot stdres.MLR = rstandard(model.MLR) qqnorm(stdres.MLR, ylab="Standardized Residuals", xlab="Normal Scores", main="QQ Plot") qqline(stdres.MLR) #Test predictions <- predict(model.MLR, newdata=test) errors <- test$bike_rent_count - predictions rmse <- sqrt(mean((errors^2))) #Erros histogram hist(errors, main="bike_rent_count", sub="(Actual-Predicted)", xlab="Error", breaks=10, col="darkred")