R基本绘图方法

---
title: "R Basic Chart"
author:
  - 刘铁柱
documentclass: ctexart
keywords:
  - 中文
  - R Markdown
output:
  rticles::ctex:
    fig_caption: yes
    number_sections: yes
    toc: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# R Basic Graphs

From R in Action Third  edition Chapter 6

```{r pressure, echo=FALSE}
plot(pressure)
```

Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.

## Bar Chart

```{r}
data(Arthritis,package = "vcd")
table(Arthritis$Improved)

library(ggplot2)

#Simple bar chart
ggplot(Arthritis, aes(x=Improved)) + geom_bar() + 
 labs(title="Simple Bar chart", 
 x="Improvement", 
 y="Frequency")      

#Horizontal bar chart
ggplot(Arthritis, aes(x=Improved)) + geom_bar() + 
 labs(title="Horizontal Bar chart", 
 x="Improvement", 
 y="Frequency") + 
 coord_flip()    

```

#### Stacked, grouped, and filled bar charts

```{r}
library(ggplot2)
table(Arthritis$Improved, Arthritis$Treatment)
#❶Stacked bar chart
ggplot(Arthritis, aes(x=Treatment, fill=Improved)) + 
 geom_bar(position = "stack") + 
 labs(title="Stacked Bar chart", 
 x="Treatment", 
 y="Frequency") 
#❷ Grouped bar chart
ggplot(Arthritis, aes(x=Treatment, fill=Improved)) + 
 geom_bar(position = "dodge") + 
 labs(title="Grouped Bar chart", 
 x="Treatment", 
 y="Frequency") 
#❸ Filled bar chart
ggplot(Arthritis, aes(x=Treatment, fill=Improved)) + 
 geom_bar(position = "fill") + 
 labs(title="Filled Bar chart", 
 x="Treatment", 
 y="Proportion")
```

```{r cars}
summary(cars)
```


#### Bar chart for sorted mean values
❶ Generates means by region
❷ Plots means in a sorted bar chart

```{r}

states <-data.frame(state.region,state.x77)
library(dplyr)
library(ggplot2)
plotdata <-states %>%
  group_by(state.region) %>%
  summarize(mean=mean(Illiteracy))   #Illiteracy文盲
plotdata
ggplot(plotdata, aes(x=reorder(state.region, mean), y=mean)) + 
 geom_bar(stat="identity") + 
 labs(x="Region", 
 y="", 
 title = "Mean Illiteracy Rate")

```

#### Bar chart label

```{r}
ggplot(mpg, aes(x=model)) + 
 geom_bar() +
 labs(title="Car models in the mpg dataset", 
 y="Frequency", x="")

ggplot(mpg, aes(x=model)) + 
 geom_bar() +
 labs(title="Car models in the mpg dataset", 
 y="Frequency", x="") +
 coord_flip()

ggplot(mpg, aes(x=model)) + 
 geom_bar() +
 labs(title="Model names in the mpg dataset", 
 y="Frequency", x="") +
 theme(axis.text.x = element_text(angle = 45, hjust = 1, size=8))


```

## Pie Chart

ggpie(data, x, by, offset, percent, legend, title)
where
- data is a data frame.

- x is the categorical variable to be plotted.

- by is an optional second categorical variable. If present, a
pie will be produced for each level of this variable.

- offset is the distance of the pie slice labels from the
origin. A value of 0.5 will place the labels in the center of
the slices, and a value greater than 1.0 will place them
outside the slice.

- percent is logical. If FALSE, percentage printing is
suppressed.

- legend is logical. If FALSE, the legend is omitted, and each
pie slice is labeled.

- title is an option title.
```{r}

if(!require(remotes)) install.packages("remotes")
{remotes::install_github("rkabacoff/ggpie") }

library(ggplot2)
library(ggpie)
ggpie(mpg, class)
ggpie(mpg, class, legend=FALSE, offset=1.3, 
 title="Automobiles by Car Class")
ggpie(mpg, class, year, 
 legend=FALSE, offset=1.3, title="Car Class by Year")


pie(table(mpg$class))
```

## Tree maps

```{r}
library(ggplot2)
library(dplyr)
library(treemapify)
#❶ Summarizes the data
plotdata <- mpg %>% count(manufacturer)
#❷ Creates the tree map
ggplot(plotdata, 
 aes(fill = manufacturer, 
 area = n, 
 label = manufacturer)) + 
geom_treemap() + 
geom_treemap_text() + 
theme(legend.position = "none") 

```

###Tree map with subgrouping

```{r}
library(ggplot2)
library(dplyr)
library(treemapify)
plotdata <- mpg %>% 
 count(manufacturer, drv)  # Computes cell counts
 plotdata$drv <- factor(plotdata$drv, 
 levels=c("4", "f", "r"), 
 labels=c("4-wheel", "front-wheel", "rear"))   # Provides better labels for drivetrains
#❸ Creates tree map
ggplot(plotdata, 
 aes(fill = manufacturer, 
 area = n, 
 label = manufacturer, 
 subgroup=drv)) + 
 geom_treemap() + 
 geom_treemap_subgroup_border() + 
 geom_treemap_subgroup_text( 
 place = "middle", 
 colour = "black", 
 alpha = 0.5, 
 grow = FALSE) + 
 geom_treemap_text(colour = "white", 
 place = "centre", 
 grow=FALSE) + 
 theme(legend.position = "none")
```


## hist

```{r}
library(lattice)
head(singer)
histogram(~height | voice.part, data = singer,
main="Distribution of Heights by Voice Pitch",
xlab="Height (inches)")
```

```{r}
summary(women)
fit <- lm(weight ~ height, data=women)
summary(fit)
plot(women)
```

```{r}
hist(mtcars$mpg, 
     freq=FALSE, 
     breaks=12, 
     col="red", 
     xlab="Miles Per Gallon", 
     main="Histogram, rug plot, density curve")  
rug(jitter(mtcars$mpg))   #地毯、抖动
lines(density(mtcars$mpg), col="blue", lwd=2)
```

### Histograms

Histograms display the distribution of a continuous variable by
dividing the range of scores into a specified number of bins on
267
the x-axis and displaying the frequency of scores in each bin on
the y-axis. You can create histograms using:

ggplot(data, aes(x = contvar)) + geom_histogram()
```{r}
library(ggplot2)
library(scales)
data(mpg)
#Simple histogram

cars2008 <- mpg[mpg$year == 2008, ]
ggplot(cars2008, aes(x=cty)) + 
 geom_histogram() + 
 labs(title="Default histogram") 

# Colored histogram with 20 bins
ggplot(cars2008, aes(x=hwy)) + 
 geom_histogram(bins=20, color="white", fill="steelblue") + 
 labs(title="Colored histogram with 20 bins", 
 x="City Miles Per Gallon", 
 y="Frequency")

#Histogram with percentages
ggplot(cars2008, aes(x=hwy, y=after_stat(density))) + 
 geom_histogram(bins=20, color="white", fill="steelblue") + 
 scale_y_continuous(labels=scales::percent) + 
 labs(title="Histogram with percentages", 
 y= "Percent",
 x="City Miles Per Gallon")

# Histogram with density curve
ggplot(cars2008, aes(x=cty, y=after_stat(density))) + 
 geom_histogram(bins=20, color="white", fill="steelblue") + 
 scale_y_continuous(labels=scales::percent) + 
 geom_density(color="red", size=1) + 
 labs(title="Histogram with density curve", 
 y="Percent" , 
 x="Highway Miles Per Gallon")
```

-《R In Action》 third edtion, page 267