---
title: "R Basic Chart"
author:
- 刘铁柱
documentclass: ctexart
keywords:
- 中文
- R Markdown
output:
rticles::ctex:
fig_caption: yes
number_sections: yes
toc: yes
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# R Basic Graphs
From R in Action Third edition Chapter 6
```{r pressure, echo=FALSE}
plot(pressure)
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
## Bar Chart
```{r}
data(Arthritis,package = "vcd")
table(Arthritis$Improved)
library(ggplot2)
#Simple bar chart
ggplot(Arthritis, aes(x=Improved)) + geom_bar() +
labs(title="Simple Bar chart",
x="Improvement",
y="Frequency")
#Horizontal bar chart
ggplot(Arthritis, aes(x=Improved)) + geom_bar() +
labs(title="Horizontal Bar chart",
x="Improvement",
y="Frequency") +
coord_flip()
```
#### Stacked, grouped, and filled bar charts
```{r}
library(ggplot2)
table(Arthritis$Improved, Arthritis$Treatment)
#❶Stacked bar chart
ggplot(Arthritis, aes(x=Treatment, fill=Improved)) +
geom_bar(position = "stack") +
labs(title="Stacked Bar chart",
x="Treatment",
y="Frequency")
#❷ Grouped bar chart
ggplot(Arthritis, aes(x=Treatment, fill=Improved)) +
geom_bar(position = "dodge") +
labs(title="Grouped Bar chart",
x="Treatment",
y="Frequency")
#❸ Filled bar chart
ggplot(Arthritis, aes(x=Treatment, fill=Improved)) +
geom_bar(position = "fill") +
labs(title="Filled Bar chart",
x="Treatment",
y="Proportion")
```
```{r cars}
summary(cars)
```
#### Bar chart for sorted mean values
❶ Generates means by region
❷ Plots means in a sorted bar chart
```{r}
states <-data.frame(state.region,state.x77)
library(dplyr)
library(ggplot2)
plotdata <-states %>%
group_by(state.region) %>%
summarize(mean=mean(Illiteracy)) #Illiteracy文盲
plotdata
ggplot(plotdata, aes(x=reorder(state.region, mean), y=mean)) +
geom_bar(stat="identity") +
labs(x="Region",
y="",
title = "Mean Illiteracy Rate")
```
#### Bar chart label
```{r}
ggplot(mpg, aes(x=model)) +
geom_bar() +
labs(title="Car models in the mpg dataset",
y="Frequency", x="")
ggplot(mpg, aes(x=model)) +
geom_bar() +
labs(title="Car models in the mpg dataset",
y="Frequency", x="") +
coord_flip()
ggplot(mpg, aes(x=model)) +
geom_bar() +
labs(title="Model names in the mpg dataset",
y="Frequency", x="") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size=8))
```
## Pie Chart
ggpie(data, x, by, offset, percent, legend, title)
where
- data is a data frame.
- x is the categorical variable to be plotted.
- by is an optional second categorical variable. If present, a
pie will be produced for each level of this variable.
- offset is the distance of the pie slice labels from the
origin. A value of 0.5 will place the labels in the center of
the slices, and a value greater than 1.0 will place them
outside the slice.
- percent is logical. If FALSE, percentage printing is
suppressed.
- legend is logical. If FALSE, the legend is omitted, and each
pie slice is labeled.
- title is an option title.
```{r}
if(!require(remotes)) install.packages("remotes")
{remotes::install_github("rkabacoff/ggpie") }
library(ggplot2)
library(ggpie)
ggpie(mpg, class)
ggpie(mpg, class, legend=FALSE, offset=1.3,
title="Automobiles by Car Class")
ggpie(mpg, class, year,
legend=FALSE, offset=1.3, title="Car Class by Year")
pie(table(mpg$class))
```
## Tree maps
```{r}
library(ggplot2)
library(dplyr)
library(treemapify)
#❶ Summarizes the data
plotdata <- mpg %>% count(manufacturer)
#❷ Creates the tree map
ggplot(plotdata,
aes(fill = manufacturer,
area = n,
label = manufacturer)) +
geom_treemap() +
geom_treemap_text() +
theme(legend.position = "none")
```
###Tree map with subgrouping
```{r}
library(ggplot2)
library(dplyr)
library(treemapify)
plotdata <- mpg %>%
count(manufacturer, drv) # Computes cell counts
plotdata$drv <- factor(plotdata$drv,
levels=c("4", "f", "r"),
labels=c("4-wheel", "front-wheel", "rear")) # Provides better labels for drivetrains
#❸ Creates tree map
ggplot(plotdata,
aes(fill = manufacturer,
area = n,
label = manufacturer,
subgroup=drv)) +
geom_treemap() +
geom_treemap_subgroup_border() +
geom_treemap_subgroup_text(
place = "middle",
colour = "black",
alpha = 0.5,
grow = FALSE) +
geom_treemap_text(colour = "white",
place = "centre",
grow=FALSE) +
theme(legend.position = "none")
```
## hist
```{r}
library(lattice)
head(singer)
histogram(~height | voice.part, data = singer,
main="Distribution of Heights by Voice Pitch",
xlab="Height (inches)")
```
```{r}
summary(women)
fit <- lm(weight ~ height, data=women)
summary(fit)
plot(women)
```
```{r}
hist(mtcars$mpg,
freq=FALSE,
breaks=12,
col="red",
xlab="Miles Per Gallon",
main="Histogram, rug plot, density curve")
rug(jitter(mtcars$mpg)) #地毯、抖动
lines(density(mtcars$mpg), col="blue", lwd=2)
```
### Histograms
Histograms display the distribution of a continuous variable by
dividing the range of scores into a specified number of bins on
267
the x-axis and displaying the frequency of scores in each bin on
the y-axis. You can create histograms using:
ggplot(data, aes(x = contvar)) + geom_histogram()
```{r}
library(ggplot2)
library(scales)
data(mpg)
#Simple histogram
cars2008 <- mpg[mpg$year == 2008, ]
ggplot(cars2008, aes(x=cty)) +
geom_histogram() +
labs(title="Default histogram")
# Colored histogram with 20 bins
ggplot(cars2008, aes(x=hwy)) +
geom_histogram(bins=20, color="white", fill="steelblue") +
labs(title="Colored histogram with 20 bins",
x="City Miles Per Gallon",
y="Frequency")
#Histogram with percentages
ggplot(cars2008, aes(x=hwy, y=after_stat(density))) +
geom_histogram(bins=20, color="white", fill="steelblue") +
scale_y_continuous(labels=scales::percent) +
labs(title="Histogram with percentages",
y= "Percent",
x="City Miles Per Gallon")
# Histogram with density curve
ggplot(cars2008, aes(x=cty, y=after_stat(density))) +
geom_histogram(bins=20, color="white", fill="steelblue") +
scale_y_continuous(labels=scales::percent) +
geom_density(color="red", size=1) +
labs(title="Histogram with density curve",
y="Percent" ,
x="Highway Miles Per Gallon")
```
-《R In Action》 third edtion, page 267