rm(list = ls())

options(warn = -1)

library(readxl)

## Reading the data from excel

Project_2_Data <- read_excel(“Stat 481 Project 2 Data.xls”)

str(Project_2_Data)

## Cleaning and attributing the dtaa

Project_2_Data$courses = as.factor(Project_2_Data$courses)

Project_2_Data$gender = as.factor(Project_2_Data$gender)

levels(Project_2_Data$gender) <- c(“Female”, “Male”)

levels(Project_2_Data$courses) <- c(“Algebra”, “Algebra&Geometry”, “Calculus”)

attach(Project_2_Data)

## Descriptives

library(ggplot2)

library(hrbrthemes)

library(dplyr)

library(tidyr)

library(viridis)

temp = aggregate(score~courses+gender, Project_2_Data, FUN = mean)

qqnorm(score)

ggplot(Project_2_Data, aes(x = score)) + geom_histogram()

summary(Project_2_Data)

p1 <- ggplot(data=Project_2_Data, aes(x=score, fill=courses)) + geom_density(adjust=1.5, alpha=.4) + theme_ipsum()

p2 <- ggplot(data=Project_2_Data, aes(x=score, fill=gender)) + geom_density(adjust=1.5, alpha=.4) + theme_ipsum()

## Model

## Test of normality and other assumptions

ks.test(score, pnorm, mean = mean(score), sd= sd(score))

bartlett.test(score~courses, data = Project_2_Data)

bartlett.test(score~gender, data = Project_2_Data)

## Linear model

model1 = anova(score ~ courses + gender, data = Project_2_Data)

model1

summary(model1)

## Post Hoc

library(DescTools)

PostHocTest(model1, method = “bonferroni”)

PostHocTest(model1, method = “hsd”)