After the World War II, antibiotics were considered as “wonder drugs”, since they were an easy remedy for what had been intractable ailments. To learn which drug worked most effectively for which bacterial infection, performance of the three most popular antibiotics on 16 bacteria were gathered.
The values in the table represent the minimum inhibitory concentration (MIC), a measure of the effectiveness of the antibiotic, which represents the concentration of antibiotic required to prevent growth in vitro. The reaction of the bacteria to Gram staining is described by the covariate “gram staining”. Bacteria that are stained dark blue or violet are Gram-positive. Otherwise, they are Gram-negative.
A lower MIC value indicates that less drug is required for inhibiting growth of the organism; therefore, drugs with lower MIC scores are more effective antimicrobial agents.
Key: (Variable Type = Count (if applicable)) ~ Proposed Encoding
raw_antibio <- read_csv("antibiotics_data.csv", col_types = cols())
antibio <- raw_antibio %>%
group_by(Bacteria) %>%
mutate(
TA_num = across(c(Penicilin, Streptomycin, Neomycin)) %>% max.col(),
Top_Antibiotic_Name = case_when(
TA_num == 1 ~ 'Penicilin',
TA_num == 2 ~ 'Streptomycin',
TA_num == 3 ~ 'Neomycin'
)
)
pivot_antibio <- antibio %>%
pivot_longer(cols = Penicilin:Neomycin, names_to = "Antibiotic", values_to="MIC")
pivot_antibio
# want per antibiotic row -> Y/n top; Place relative to others
# right now it's WHICH is the top, but I want it encoded relative to the antibiotic in that row
# pivot_antibio
# antibio
min.col <- function(m, ...)
max.col(-m, ...)
scored_antibio <- raw_antibio %>%
group_by(Bacteria) %>%
mutate(
TA_num_max = across(c(Penicilin, Streptomycin, Neomycin)) %>% max.col(),
TA_num_min = across(c(Penicilin, Streptomycin, Neomycin)) %>% min.col(),
Top_Antibiotic_Name = case_when(
TA_num_max == 1 ~ 'Penicilin',
TA_num_max == 2 ~ 'Streptomycin',
TA_num_max == 3 ~ 'Neomycin'
),
Bottom_Antibiotic_Name = case_when(
TA_num_min == 1 ~ 'Penicilin',
TA_num_min == 2 ~ 'Streptomycin',
TA_num_min == 3 ~ 'Neomycin'
),
Mid_Antibiotic_Name = case_when(
TA_num_max != 1 & TA_num_min != 1 ~ 'Penicilin',
TA_num_max != 2 & TA_num_min != 2 ~ 'Streptomycin',
TA_num_max != 3 & TA_num_min != 3 ~ 'Neomycin'
)
)
pivot_scored_antibio <- scored_antibio %>%
pivot_longer(cols = Penicilin:Neomycin,
names_to = "Antibiotic",
values_to = "MIC")
pivot_scored_antibio <- pivot_scored_antibio %>%
mutate(
relative_score = case_when(
Antibiotic == Top_Antibiotic_Name ~ 3,
Antibiotic == Mid_Antibiotic_Name ~ 2,
Antibiotic == Bottom_Antibiotic_Name ~ 1
),
relative_score_term = case_when(
Antibiotic == Bottom_Antibiotic_Name ~ "Least",
Antibiotic == Mid_Antibiotic_Name ~ "Middle",
Antibiotic == Top_Antibiotic_Name ~ "Most"
)
) %>% select(Bacteria,
`Gram-Staining`,
Antibiotic,
MIC,
relative_score,
relative_score_term)
pivot_scored_antibio <- pivot_scored_antibio %>%
group_by(Antibiotic) %>%
mutate(
n_best = sum(relative_score == 1),
n_mid = sum(relative_score == 2),
n_worst = sum(relative_score == 3)
) %>% ungroup()
rel_counts <- pivot_scored_antibio %>%
group_by(Antibiotic) %>%
summarise(
n_best = sum(relative_score == 1)/16,
n_mid = sum(relative_score == 2)/16,
n_worst = sum(relative_score == 3)/16
)
rel_counts
# write_csv(pivot_antibio, "antibiotics_data_pivoted.csv")
write_csv(pivot_scored_antibio, "antibiotics_data_pivoted.csv")
antibio %>% summary()
## Bacteria Penicilin Streptomycin Neomycin
## Length:16 Min. : 0.0010 Min. : 0.010 Min. : 0.001
## Class :character 1st Qu.: 0.0065 1st Qu.: 0.325 1st Qu.: 0.017
## Mode :character Median : 1.0000 Median : 1.000 Median : 0.100
## Mean :217.8781 Mean : 3.065 Mean : 4.089
## 3rd Qu.:275.0000 3rd Qu.: 2.750 3rd Qu.: 1.700
## Max. :870.0000 Max. :14.000 Max. :40.000
## Gram-Staining TA_num Top_Antibiotic_Name
## Length:16 Min. :1.000 Length:16
## Class :character 1st Qu.:1.000 Class :character
## Mode :character Median :1.000 Mode :character
## Mean :1.438
## 3rd Qu.:2.000
## Max. :3.000
Each row is a unique bacteria:
antibio$Bacteria %>% length() == antibio$Bacteria %>% unique() %>% length()
## [1] TRUE
# trying to rank the antibiotic for each bacteria
# pivot_antibio %>%
# group_by(Bacteria, Antibiotic) %>%
# summarise(
# MIC,
# max.col()
# )
# pivot_antibio %>%
# group_by(Bacteria) %>%
# summarise(
# MIC
# )
# antibio %>%
# mutate(
# best = max.col(Penicilin, Streptomycin, Neomycin)
# )
# pivot_antibio
# antibio %>%
# group_by(Bacteria) %>%
# summarise(
# Bacteria,
# Penicilin, Streptomycin, Neomycin,
# max(Penicilin, Streptomycin, Neomycin)
# )
# antibio %>%
# mutate(
# BestAntibiotic = case_when(
#
# )
# )
# pivot_antibio %>% glimpse()
pivot_antibio %>%
# ggplot(aes(MIC, Bacteria, fill=`Gram-Staining`)) +
# ggplot(aes(Bacteria, MIC, color=`Gram-Staining`)) +
ggplot(aes(Bacteria, MIC, shape = `Bacteria`, color = `Gram-Staining`)) +
# scale_x_continuous(trans='log') +
scale_y_continuous(trans = 'log') +
# scale_x_continuous(trans='log', limits = c(-0.1, 5000)) +
# scale_x_continuous(trans='log',expand = c(0, 0), limits = c(-0.1, NA)) +
# geom_col() +
# scale_shape_prism(palette = "complete") +
scale_shape_manual(values = 1:16) +
geom_point() +
theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
facet_wrap( ~ Antibiotic) +
ylab("Minimum Inhibitory Concentration (MIC)") +
xlab("")