After the World War II, antibiotics were considered as “wonder drugs”, since they were an easy remedy for what had been intractable ailments. To learn which drug worked most effectively for which bacterial infection, performance of the three most popular antibiotics on 16 bacteria were gathered.

The values in the table represent the minimum inhibitory concentration (MIC), a measure of the effectiveness of the antibiotic, which represents the concentration of antibiotic required to prevent growth in vitro. The reaction of the bacteria to Gram staining is described by the covariate “gram staining”. Bacteria that are stained dark blue or violet are Gram-positive. Otherwise, they are Gram-negative.

A lower MIC value indicates that less drug is required for inhibiting growth of the organism; therefore, drugs with lower MIC scores are more effective antimicrobial agents.

Variables

Key: (Variable Type = Count (if applicable)) ~ Proposed Encoding

raw_antibio <- read_csv("antibiotics_data.csv", col_types = cols())

antibio <- raw_antibio %>% 
  group_by(Bacteria) %>% 
  mutate(
    TA_num = across(c(Penicilin, Streptomycin, Neomycin)) %>% max.col(),
    Top_Antibiotic_Name = case_when(
      TA_num == 1 ~ 'Penicilin',
      TA_num == 2 ~ 'Streptomycin',
      TA_num == 3 ~ 'Neomycin'
    )
  )
pivot_antibio <- antibio %>%
  pivot_longer(cols = Penicilin:Neomycin, names_to = "Antibiotic", values_to="MIC")
pivot_antibio
# want per antibiotic row -> Y/n top; Place relative to others
# right now it's WHICH is the top, but I want it encoded relative to the antibiotic in that row

# pivot_antibio
# antibio

min.col <- function(m, ...)
  max.col(-m, ...)

scored_antibio <- raw_antibio %>%
  group_by(Bacteria) %>%
  mutate(
    TA_num_max = across(c(Penicilin, Streptomycin, Neomycin)) %>% max.col(),
    TA_num_min = across(c(Penicilin, Streptomycin, Neomycin)) %>% min.col(),
    Top_Antibiotic_Name = case_when(
      TA_num_max == 1 ~ 'Penicilin',
      TA_num_max == 2 ~ 'Streptomycin',
      TA_num_max == 3 ~ 'Neomycin'
    ),
    Bottom_Antibiotic_Name = case_when(
      TA_num_min == 1 ~ 'Penicilin',
      TA_num_min == 2 ~ 'Streptomycin',
      TA_num_min == 3 ~ 'Neomycin'
    ),
    Mid_Antibiotic_Name = case_when(
      TA_num_max != 1 & TA_num_min != 1 ~ 'Penicilin',
      TA_num_max != 2 & TA_num_min != 2 ~ 'Streptomycin',
      TA_num_max != 3 & TA_num_min != 3 ~ 'Neomycin'
    )
  )

pivot_scored_antibio <- scored_antibio %>%
  pivot_longer(cols = Penicilin:Neomycin,
               names_to = "Antibiotic",
               values_to = "MIC")

pivot_scored_antibio <- pivot_scored_antibio %>%
  mutate(
    relative_score = case_when(
      Antibiotic == Top_Antibiotic_Name ~ 3,
      Antibiotic == Mid_Antibiotic_Name ~ 2,
      Antibiotic == Bottom_Antibiotic_Name ~ 1
    ),
    relative_score_term = case_when(
      Antibiotic == Bottom_Antibiotic_Name ~ "Least",
      Antibiotic == Mid_Antibiotic_Name ~ "Middle",
      Antibiotic == Top_Antibiotic_Name ~ "Most"
    )
  ) %>% select(Bacteria,
               `Gram-Staining`,
               Antibiotic,
               MIC,
               relative_score,
               relative_score_term)
pivot_scored_antibio <- pivot_scored_antibio %>%
  group_by(Antibiotic) %>%
  mutate(
    n_best = sum(relative_score == 1),
    n_mid = sum(relative_score == 2),
    n_worst = sum(relative_score == 3)
  ) %>% ungroup()

rel_counts <- pivot_scored_antibio %>%
  group_by(Antibiotic) %>%
  summarise(
    n_best = sum(relative_score == 1)/16,
    n_mid = sum(relative_score == 2)/16,
    n_worst = sum(relative_score == 3)/16
  )
rel_counts
# write_csv(pivot_antibio, "antibiotics_data_pivoted.csv")
write_csv(pivot_scored_antibio, "antibiotics_data_pivoted.csv")

Summary

antibio %>% summary()
##    Bacteria           Penicilin         Streptomycin       Neomycin     
##  Length:16          Min.   :  0.0010   Min.   : 0.010   Min.   : 0.001  
##  Class :character   1st Qu.:  0.0065   1st Qu.: 0.325   1st Qu.: 0.017  
##  Mode  :character   Median :  1.0000   Median : 1.000   Median : 0.100  
##                     Mean   :217.8781   Mean   : 3.065   Mean   : 4.089  
##                     3rd Qu.:275.0000   3rd Qu.: 2.750   3rd Qu.: 1.700  
##                     Max.   :870.0000   Max.   :14.000   Max.   :40.000  
##  Gram-Staining          TA_num      Top_Antibiotic_Name
##  Length:16          Min.   :1.000   Length:16          
##  Class :character   1st Qu.:1.000   Class :character   
##  Mode  :character   Median :1.000   Mode  :character   
##                     Mean   :1.438                      
##                     3rd Qu.:2.000                      
##                     Max.   :3.000

Each row is a unique bacteria:

antibio$Bacteria %>% length() == antibio$Bacteria %>% unique() %>% length()
## [1] TRUE
# trying to rank the antibiotic for each bacteria
# pivot_antibio %>%
#   group_by(Bacteria, Antibiotic) %>%
#   summarise(
#     MIC,
#     max.col()
#   )

# pivot_antibio %>%
#   group_by(Bacteria) %>%
#   summarise(
#     MIC
#   )
# antibio %>% 
#   mutate(
#     best = max.col(Penicilin, Streptomycin, Neomycin)
#   )

# pivot_antibio
# antibio %>% 
#   group_by(Bacteria) %>% 
#   summarise(
#     Bacteria,
#     Penicilin, Streptomycin, Neomycin,
#     max(Penicilin, Streptomycin, Neomycin)
#   )

# antibio %>% 
#   mutate(
#     BestAntibiotic = case_when(
#       
#     )
#   )
# pivot_antibio %>% glimpse()

pivot_antibio %>%
  # ggplot(aes(MIC, Bacteria, fill=`Gram-Staining`)) +
  # ggplot(aes(Bacteria, MIC, color=`Gram-Staining`)) +
  ggplot(aes(Bacteria, MIC, shape = `Bacteria`, color = `Gram-Staining`)) +
  # scale_x_continuous(trans='log') +
  scale_y_continuous(trans = 'log') +
  # scale_x_continuous(trans='log', limits = c(-0.1, 5000)) +
  # scale_x_continuous(trans='log',expand = c(0, 0), limits = c(-0.1, NA)) +
  # geom_col() +
  # scale_shape_prism(palette = "complete") +
  scale_shape_manual(values = 1:16) +
  geom_point() +
  theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
  facet_wrap( ~ Antibiotic) +
  ylab("Minimum Inhibitory Concentration (MIC)") +
  xlab("")