R Programming Assignment Help

R Programming Task on Naive Bayes Classifier

#data

Universal_bank = read.csv(“UniversalBank.csv”, header = T)
dim(Universal_bank)
head(Universal_bank)

#Selecting the variables : Online, CreditCard and Personal Loan
Universal_bank <- Universal_bank[ ,c(13, 14, 10)]
Universal_bank

#Setting the seed
set.seed(12345)

#Checking the missing values in the data
sum(is.na(Universal_bank))

#Splitting the data into training and validation sets
dt = sort(sample(nrow(Universal_bank), 0.6 * nrow(Universal_bank) ))
Universal_bank_train <- Universal_bank[dt,]
Universal_bank_train

Universal_bank_valid <- Universal_bank[-dt,]
Universal_bank_valid
# Q1. Pivot Table

ftable(Universal_bank_train, row.vars = c(2, 3), col.vars = 1 )

# Q2.
df <- ftable(Universal_bank_train, row.vars = c(1, 2), col.vars = 3 )
df = as.data.frame(df)
View(df)

# Probability a customer who owns a bank credit card and is actively using online banking services will accept the loan offer

45 /sum(df$Freq) #by looking at the table

# Q3.
#Pivot table with row as PersonalLoan and column as Online
ftable(Universal_bank_train, row.vars = 3, col.vars = 1 )
df1 <- ftable(Universal_bank_train, row.vars = 3, col.vars = 1 )
df1 <- as.data.frame(df1)
View(df1)

#Pivot table with row as PersonalLoan and column as CreditCard
ftable(Universal_bank_train, row.vars = 3, col.vars = 2)
df2 <- ftable(Universal_bank_train, row.vars = 3, col.vars = 2)
df2 <- as.data.frame(df2)
View(df2)

#Computing the Probabilites
# 1. P(CC = 1|Loan = 1)

87 / (207 + 87)

#2. P(Online = 1|Loan = 1)

173 / (121 + 173)

#3. P(Loan = 1) = the proportion of loan acceptors

294 / (294 + 2706)

#4. P(CC = 1|Loan = 0)

773 / (1933 + 773)

#5. P(Online = 1|Loan = 0)

1601 / (1105 + 1601)

#6. P(Loan = 0)
2706 / (2706 + 294)

#Q 4. The naive Bayes probability P(Loan = 1|CC = 1, Online = 1)

((87 / (207 + 87))*(173 / (121 + 173))*(294 / (294 + 2706)))/(((87 / (207 + 87))*(173 / (121 + 173))*(294 / (294 + 2706)))+ ((773 / (1933 + 773))*(1601 / (1105 + 1601))*(2706 / (2706 + 294))))

# Q 5.
#In Q-4, we obtained 10.06% and in Q-2 1.5 % . The difference between the exact method and the naive-baiyes method is the exact method would need the the exact same independent variable classifications to predict, where the naive bayes method does not.Therefore, exact method is more accurate.

# Q 6.

library(“e1071”)

# Naive Bayes on the training data

naivebayes = naiveBayes(PersonalLoan~.,data=Universal_bank_train)
naivebayes
#The naive bayes is the exact same output we received in the previous methods. (.457)(.492)(.098)/(.457.492.098+.451.491.902) = 10.06688 which is the same response provided as above.