#data
Universal_bank = read.csv(“UniversalBank.csv”, header = T)
dim(Universal_bank)
head(Universal_bank)
#Selecting the variables : Online, CreditCard and Personal Loan
Universal_bank <- Universal_bank[ ,c(13, 14, 10)]
Universal_bank
#Setting the seed
set.seed(12345)
#Checking the missing values in the data
sum(is.na(Universal_bank))
#Splitting the data into training and validation sets
dt = sort(sample(nrow(Universal_bank), 0.6 * nrow(Universal_bank) ))
Universal_bank_train <- Universal_bank[dt,]
Universal_bank_train
Universal_bank_valid <- Universal_bank[-dt,]
Universal_bank_valid
# Q1. Pivot Table
ftable(Universal_bank_train, row.vars = c(2, 3), col.vars = 1 )
# Q2.
df <- ftable(Universal_bank_train, row.vars = c(1, 2), col.vars = 3 )
df = as.data.frame(df)
View(df)
# Probability a customer who owns a bank credit card and is actively using online banking services will accept the loan offer
45 /sum(df$Freq) #by looking at the table
# Q3.
#Pivot table with row as PersonalLoan and column as Online
ftable(Universal_bank_train, row.vars = 3, col.vars = 1 )
df1 <- ftable(Universal_bank_train, row.vars = 3, col.vars = 1 )
df1 <- as.data.frame(df1)
View(df1)
#Pivot table with row as PersonalLoan and column as CreditCard
ftable(Universal_bank_train, row.vars = 3, col.vars = 2)
df2 <- ftable(Universal_bank_train, row.vars = 3, col.vars = 2)
df2 <- as.data.frame(df2)
View(df2)
#Computing the Probabilites
# 1. P(CC = 1|Loan = 1)
87 / (207 + 87)
#2. P(Online = 1|Loan = 1)
173 / (121 + 173)
#3. P(Loan = 1) = the proportion of loan acceptors
294 / (294 + 2706)
#4. P(CC = 1|Loan = 0)
773 / (1933 + 773)
#5. P(Online = 1|Loan = 0)
1601 / (1105 + 1601)
#6. P(Loan = 0)
2706 / (2706 + 294)
#Q 4. The naive Bayes probability P(Loan = 1|CC = 1, Online = 1)
((87 / (207 + 87))*(173 / (121 + 173))*(294 / (294 + 2706)))/(((87 / (207 + 87))*(173 / (121 + 173))*(294 / (294 + 2706)))+ ((773 / (1933 + 773))*(1601 / (1105 + 1601))*(2706 / (2706 + 294))))
# Q 5.
#In Q-4, we obtained 10.06% and in Q-2 1.5 % . The difference between the exact method and the naive-baiyes method is the exact method would need the the exact same independent variable classifications to predict, where the naive bayes method does not.Therefore, exact method is more accurate.
# Q 6.
library(“e1071”)
# Naive Bayes on the training data
naivebayes = naiveBayes(PersonalLoan~.,data=Universal_bank_train)
naivebayes
#The naive bayes is the exact same output we received in the previous methods. (.457)(.492)(.098)/(.457.492.098+.451.491.902) = 10.06688 which is the same response provided as above.