In-class 4

pacman::p_load(tidyverse, ggstatsplot)
exam <- read_csv("../data/Exam_data.csv")
Rows: 322 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ID, CLASS, GENDER, RACE
dbl (3): ENGLISH, MATHS, SCIENCE

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(1234)
gghistostats(data=exam, 
             x=ENGLISH, 
             type="parametric", 
             test.value=60, 
             bin.args=list(color = "black", 
                           fill = "grey50", 
                           alpha=0.7), 
             normal.curve = FALSE, 
             normal.curve.args = list(linewidth=2), 
             xlab="English scores")

set.seed(1234)
gghistostats(data=exam, 
             x=ENGLISH, 
             type="np", 
             test.value=60, 
             bin.args=list(color = "black", 
                           fill = "grey50", 
                           alpha=0.7), 
             normal.curve = FALSE, 
             normal.curve.args = list(linewidth=2), 
             xlab="English scores")

exam_long = exam %>%
  pivot_longer(
    cols = ENGLISH:SCIENCE,
    names_to = "SUBJECT",
    values_to = "SCORES") %>%
  filter(CLASS == "3A")

head(exam_long)
# A tibble: 6 × 6
  ID         CLASS GENDER RACE    SUBJECT SCORES
  <chr>      <chr> <chr>  <chr>   <chr>    <dbl>
1 Student026 3A    Male   Chinese ENGLISH     68
2 Student026 3A    Male   Chinese MATHS       87
3 Student026 3A    Male   Chinese SCIENCE     66
4 Student021 3A    Male   Chinese ENGLISH     70
5 Student021 3A    Male   Chinese MATHS       90
6 Student021 3A    Male   Chinese SCIENCE     72
ggwithinstats(
  data= filter(exam_long, 
               SUBJECT %in% 
                 c("MATHS", "SCIENCE")),
  x = SUBJECT,
  y = SCORES,
  type = "p"
)
Warning in min(x): no non-missing arguments to min; returning Inf
Warning in max(x): no non-missing arguments to max; returning -Inf

ggscatterstats(
  data = exam,
  x = MATHS,
  y = ENGLISH,
  marginal = TRUE,
  label.var = ID,
  label.expression = ENGLISH > 90 & MATHS > 90
)
Registered S3 method overwritten by 'ggside':
  method from   
  +.gg   ggplot2
`stat_xsidebin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_ysidebin()` using `bins = 30`. Pick better value with `binwidth`.