Cap 3 Barras com duas variáveis
Para que serve: O gráfico de barras é uma maneira de resumir a informação de duas ou mais variáveis qualitativas.
par(bg="#fdf6e3")
load(url("https://raw.githubusercontent.com/DATAUNIRIO/Base_de_dados/master/Titanic.RData"))
#legenda
nomes = levels(Titanic$Classe)
nomes
## [1] "Tripulação" "Primeira" "Segunda" "Terceira"
# percentual da linha ou da coluna
#porcent = prop.table(bartable)*100 # percentual do total
#porcentlinha = prop.table(bartable,1)*100 # percentual da linha
#porcentcoluna = prop.table(bartable,1)*100 # percentual da coluna
bartable = table(Titanic$Classe, Titanic$Sobreviveu) ## get the cross tab
porcentlinha = round(prop.table(bartable,1)*100,1) # percentual da linha arrendondada para uma casa decimal
# Método simples
rotulo=paste(porcentlinha)
barras<-barplot(bartable, beside = TRUE, legend = levels(unique(Titanic$Classe)),col=c("black","skyblue","royalblue","darkblue")) ## plot
text(barras, 0, rotulo,cex=1,pos=3, col ="#ffffff")
par(bg="#fdf6e3")
# Customização
rotulo2=paste(nomes,"\n (",porcentlinha,"%",")",sep="")
barras2<-barplot(bartable, beside = TRUE, legend = levels(unique(Titanic$Classe)),col=c("black","skyblue","royalblue","darkblue")) ## plot
text(barras2, 2, rotulo2,cex=0.8,pos=4, srt=90,col ="#ffffff")
barras3<-barplot(bartable, legend = levels(unique(Titanic$Classe)),col=c("black","skyblue","royalblue","darkblue"))
3.1 Barras no ggplot2
library(ggplot2)
library(reshape2)
x <- c(5,17,31,9,17,10,30,28,16,29,14,34)
y <- c(1,2,3,4,5,6,7,8,9,10,11,12)
day <- c(1,2,3,4,5,6,7,8,9,10,11,12)
df1 <- data.frame(x, y, day)
df2 <- melt(df1, id.vars='day')
ggplot(df2, aes(x=day, y=value, fill=variable)) +
geom_bar(stat='identity', position='dodge')
3.2 Barras “lado a lado” no ggplot2
library("ggplot2")
library("reshape")
x <- c(5,17,31,9,17,10,30,28,16,29,14,34)
y <- c(1,2,3,4,5,6,7,8,9,10,11,12)
day <- c(1,2,3,4,5,6,7,8,9,10,11,12)
df1 <- data.frame(x,y,day)
df2 <- reshape::melt(df1, id = c("day"))
ggplot(data = df2, aes(x = day, y = value, fill = variable)) +
geom_bar(stat = "identity")+ facet_wrap(~ variable) +
scale_x_continuous(breaks=seq(1,12,2))
library("ggplot2")
library("reshape")
x <- c(5,17,31,9,17,10,30,28,16,29,14,34)
y <- c(1,2,3,4,5,6,7,8,9,10,11,12)
day <- c(1,2,3,4,5,6,7,8,9,10,11,12)
df1 <- data.frame(x,y,day)
df2 <- reshape::melt(df1, id = c("day"))
ggplot(data = df2, aes(x = day, y = value, fill = day)) +
geom_bar(stat = "identity") +
facet_wrap(~ variable) +
scale_x_continuous(breaks=seq(1,12,2))
library(dplyr)
avg_mpg <- mtcars %>%
group_by(cyl, am) %>%
summarise(mpg = mean(mpg, na.rm = TRUE))
p1 <- ggplot(avg_mpg, aes(factor(cyl), mpg, fill = factor(am))) +
geom_bar(stat = "identity", position = "dodge") +
ggtitle("Default color comparison")
library(dplyr)
avg_mpg <- mtcars %>%
group_by(cyl, am) %>%
summarise(mpg = mean(mpg, na.rm = TRUE))
# more pleasing colors
p2 <- ggplot(avg_mpg, aes(factor(cyl), mpg, fill = factor(am))) +
geom_bar(stat = "identity", position = "dodge", color = "grey40") +
scale_fill_brewer(palette = "Pastel1") +
ggtitle("Adjusted color comparison")
grid.arrange(p1, p2, ncol = 2)
library(dplyr)
avg_mpg <- mtcars %>%
group_by(cyl, am) %>%
summarise(mpg = mean(mpg, na.rm = TRUE))
p3 <- ggplot(avg_mpg, aes(factor(cyl), mpg, fill = factor(am))) +
geom_bar(stat = "identity")
p3
p1 <- ggplot(mtcars, aes(reorder(row.names(mtcars), mpg), mpg)) +
geom_bar(stat = "identity") +
coord_flip() +
geom_text(aes(label = mpg), nudge_y = 2)
p2 <- ggplot(mtcars, aes(reorder(row.names(mtcars), mpg), mpg)) +
geom_bar(stat = "identity") +
coord_flip() +
geom_text(aes(label = mpg), nudge_y = -2, color = "white")
grid.arrange(p1, p2, ncol = 2)
p1 <- ggplot(avg_mpg, aes(factor(cyl), mpg, fill = factor(am))) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = round(mpg, 1)), position = position_dodge(0.9)) +
ggtitle("Fig A: Default text alignment")
p2 <- ggplot(avg_mpg, aes(factor(cyl), mpg, fill = factor(am))) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = round(mpg, 1)), position = position_dodge(0.9),
vjust = 1.5, color = "white") +
ggtitle("Fig B: Adjusted text alignment")
grid.arrange(p1, p2, ncol = 2)
# compare mpg across all cars and color based on cyl
p1 <- ggplot(mtcars, aes(x = reorder(row.names(mtcars), mpg), y = mpg, fill = factor(cyl))) +
geom_bar(stat = "identity") +
coord_flip() +
theme_minimal() +
ggtitle("Fig. A: Default fill colors")
p1
p2 <- ggplot(mtcars, aes(x = reorder(row.names(mtcars), mpg), y = mpg, fill = factor(cyl))) +
scale_fill_manual(values = c("#e5f5e0", "#a1d99b", "#31a354")) +
geom_bar(stat = "identity") +
coord_flip() +
theme_minimal() +
ggtitle("Fig. B: Manually set fill colors")
p2
3.3 create label location for each proportional bar
# create label location for each proportional bar
proportion <- mtcars %>%
group_by(cyl, am) %>%
tally() %>%
group_by(cyl) %>%
mutate(pct = n / sum(n))
proportion <- proportion %>%
group_by(cyl) %>%
mutate(label_y = cumsum(pct))
p1 <- ggplot(proportion, aes(factor(cyl), pct, fill = factor(am, levels = c(1, 0)))) +
geom_bar(stat = "identity", color = "grey40") +
geom_text(aes(label = round(pct, 2), y = label_y), vjust = 1.5, color = "white") +
scale_fill_manual(values = c("#a1d99b", "#31a354")) +
labs(fill = "AM")
p1
p2 <- ggplot(proportion, aes(factor(cyl), pct, fill = factor(am, levels = c(1, 0)))) +
geom_bar(stat = "identity", position = "dodge", color = "grey40") +
scale_fill_manual(values = c("#a1d99b", "#31a354")) +
geom_text(aes(label = round(pct, 2), y = label_y), position = position_dodge(0.9),
vjust = 1.5, color = "white", family = "Georgia")
p2
p1+ labs(title = "Distribution of Adults by Income in Dayton, OH",
subtitle = "The percentage of adults in the middle class eroded by 5.3% from 2000 to 2014. Although a small \nfraction of these individuals moved into the upper class (+0.5%), the majority of these middle class \nindividuals moved into the lower income class (+4.8%).",
caption = "Source: Pew Research Center analysis of the \n2000 decennial census and 2014 American \nCommunity Survey (IPUMS)")
## Alternativas ao Gráfico de Barras
3.4 MOSAICO
Descrição. Conjunto de retângulo, cada um representa o cruzamento de dois níveis de duas variáveis e o tamanho de cada retângulo é proporcional ao percentual de observações dessa combinação de níveis de variáveis.
# Gráfico de Mosaico ou Mosaicplot
library(RColorBrewer)
par(bg="#fdf6e3")
#display.brewer.all()
COR<-brewer.pal(4,"Dark2")
#COR
# Gráfico de Mosaico ou Mosaicplot
mosaico<-mosaicplot(bartable,col=COR)
3.5 Balloon plot
Balloon plot is an alternative to bar plot for visualizing a large categorical data. We’ll use the function ggballoonplot() [in ggpubr], which draws a graphical matrix of a contingency table, where each cell contains a dot whose size reflects the relative magnitude of the corresponding component.
par(bg="#fdf6e3")
library(ggplot2)
library(ggpubr)
theme_set(theme_pubr())
data("mtcars")
ggballoonplot(mtcars, fill = "value")
COR <- c("#0D0887FF", "#6A00A8FF", "#B12A90FF","#E16462FF", "#FCA636FF", "#F0F921FF")
ggballoonplot(mtcars, fill = "value")+
scale_fill_gradientn(colors = COR)
library(RCurl)
x <- getURL("https://raw.githubusercontent.com/DATAUNIRIO/Base_de_dados/master/Estados.csv")
BaseUF <- read.csv(text=x, header=T, quote="", sep=";",dec = ",")
row.names(BaseUF)<-BaseUF$Estado
BaseUF<-BaseUF[,c(7:15)]
ggballoonplot(BaseUF, fill = "value")
Correspondence analysis
Correspondence analysis can be used to summarize and visualize the information contained in a large contingency table formed by two categorical variables.
Required package: FactoMineR for the analysis and factoextra for the visualization
library(FactoMineR)
library(factoextra)
dt=mtcars[,c(1:7,10)]
res.ca <- CA(dt, graph = FALSE)
fviz_ca_biplot(res.ca, repel = TRUE)