Introduction to R

R Markdown: The Definitive Guide

# Hello, everyone!
3+5
## [1] 8
3-5
## [1] -2
3 - 5
## [1] -2
3*5
## [1] 15
3/5
## [1] 0.6
3 / 5
## [1] 0.6
a <- 8
3 -> b
a
## [1] 8
b
## [1] 3
a = 8
b = 3
a
## [1] 8
b
## [1] 3
mean(x = c(1, 4, 7, 9, 19))
## [1] 8
x
## Error in eval(expr, envir, enclos): object 'x' not found
mean(x <- c(1, 4, 7, 9, 19))
## [1] 8
x
## [1]  1  4  7  9 19
w <- c(39, 61, 9, 17, 25, 56, 47, 62, 71, 100, 1, 42) # c() combines objects
median(w) # find the median of w
## [1] 44.5
w[3] # find the value of the third element in w
## [1] 9
w[1:2] # find the values of the first and second elements in w
## [1] 39 61
w[2:4] # find the values between the second and the fourth elements in w
## [1] 61  9 17
w[c(2,5,8)] # only find the values of the second, fifth and eighth elements in w
## [1] 61 25 62
w[-5] # the fifth element in w is removed
##  [1]  39  61   9  17  56  47  62  71 100   1  42
w[w < 50] # only obtain values that satisfy the condition
## [1] 39  9 17 25 47  1 42
x <- "Hello"
y <- "World!"
class(x) # we can use class() function to obtain the data type
## [1] "character"
nchar(x) # use nchar() to count the number of characters
## [1] 5
paste(x,y)
## [1] "Hello World!"
paste(x,y,sep=",")
## [1] "Hello,World!"
paste(x,y,sep=", ")
## [1] "Hello, World!"
paste(x, ", ", y)
## [1] "Hello ,  World!"
paste0(x,y)
## [1] "HelloWorld!"
allfiles1 <- paste("file_", 1:5)
allfiles2 <- paste("file_", 1:5, collapse = "_")
allfiles3 <- paste("file", 1:5, sep = "_")
allfiles1
## [1] "file_ 1" "file_ 2" "file_ 3" "file_ 4" "file_ 5"
allfiles2
## [1] "file_ 1_file_ 2_file_ 3_file_ 4_file_ 5"
allfiles3
## [1] "file_1" "file_2" "file_3" "file_4" "file_5"
grade <- factor(c("A", "C", "B", "B-", "A", "C+", "D", "A-", "B+", "C-", "B"))
grade
##  [1] A  C  B  B- A  C+ D  A- B+ C- B 
## Levels: A A- B B- B+ C C- C+ D
gender <- c("M", "F", "F", "M", "M", "M", "F", "M", "F")
gender <- as.factor(gender)
class(gender)
## [1] "factor"
levels(gender) # use levels() to find all categories in the variable
## [1] "F" "M"
length(grade) # use length() to find the length of vectors 
## [1] 11
x1 <- 3
x2 <- c(-3.13, 2.47, 6, -1.5, 4.29, 2.72, 1, 0, 3.85)
class(x1)
## [1] "numeric"
class(x2)
## [1] "numeric"
sum(x2)
## [1] 15.7
max(x2)
## [1] 6
min(x2)
## [1] -3.13
range(x2)
## [1] -3.13  6.00
round(x2) # round off the values
## [1] -3  2  6 -2  4  3  1  0  4
ceiling(x2) # round up to the nearest integer
## [1] -3  3  6 -1  5  3  1  0  4
floor(x2) # round down to the nearest integer
## [1] -4  2  6 -2  4  2  1  0  3
summary(x2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -3.130   0.000   2.470   1.744   3.850   6.000
x2 <- c(-3.13, 2.47, 6, -1.5, 4.29, 2.72, 1, 0, 3.85)
x3 <- as.integer(x2) # only remain the integers
x3
## [1] -3  2  6 -1  4  2  1  0  3
class(x3)
## [1] "integer"
y1 <- -7
y2 <- 11
y1 > y2
## [1] FALSE
y1 == y2  # check if two objects are the same
## [1] FALSE
y1 <= y2
## [1] TRUE
result <- y1 > y2
class(result)
## [1] "logical"
sqrt(-1)
## [1] NaN
1*i
## Error in eval(expr, envir, enclos): object 'i' not found
1i
## [1] 0+1i
x <- 3+2i
y <- -7i
class(x)
## [1] "complex"
x+y
## [1] 3-5i
x <- c()
y1 <- vector("character", length=3)
y2 <- character(3)
z1 <- vector("numeric", 5)
z2 <- numeric(5)
w <- rep(NA, 2)
x
## NULL
y1
## [1] "" "" ""
y2
## [1] "" "" ""
z1
## [1] 0 0 0 0 0
z2
## [1] 0 0 0 0 0
w
## [1] NA NA
z1 <- as.integer(c(3, 5))
class(z1)
## [1] "integer"
z2 <- as.character(z1) # transform integer object to character
z2
## [1] "3" "5"
as.numeric(3<8)  # transform logical object to numeric values
## [1] 1
as.numeric(3>8)
## [1] 0
gender <- factor(c("M", "F", "F", "M", "M", "M", "F", "M", "F"))
as.numeric(gender) # transform levels in the categorical variables to numbers
## [1] 2 1 1 2 2 2 1 2 1
a <- "Hello, the World!"
toupper(a)
## [1] "HELLO, THE WORLD!"
tolower(a)
## [1] "hello, the world!"
b <- "Good to see you!"
c(a, b) # combine to vectors
## [1] "Hello, the World!" "Good to see you!"
toupper(c(a,b))
## [1] "HELLO, THE WORLD!" "GOOD TO SEE YOU!"
substring(a, 8, 10) # extract characters from 2nd to 5th position
## [1] "the"
print("Hello")
## [1] "Hello"
print(1)
## [1] 1
print(1, 3, 6)
## [1] 1
print(c(1, 3, 6))
## [1] 1 3 6
print(c("Hello!", c(1,3,6)))
## [1] "Hello!" "1"      "3"      "6"
print(c("Hello!", 1,3,6))
## [1] "Hello!" "1"      "3"      "6"
x <- c("xyz", "xyz", "yxz", "yzx", "zxy", "zyx")
grep("xy", x, value=TRUE) # return values
## [1] "xyz" "xyz" "zxy"
grep("xy", x) # default: value = FALSE, it returns the indices (positions)
## [1] 1 2 5
grade
##  [1] A  C  B  B- A  C+ D  A- B+ C- B 
## Levels: A A- B B- B+ C C- C+ D
gsub("A", 90, grade)
##  [1] "90"  "C"   "B"   "B-"  "90"  "C+"  "D"   "90-" "B+"  "C-"  "B"
gender
## [1] M F F M M M F M F
## Levels: F M
gsub("F", "Girl", gender)
## [1] "M"    "Girl" "Girl" "M"    "M"    "M"    "Girl" "M"    "Girl"
gsub("M", "Boy", gender)
## [1] "Boy" "F"   "F"   "Boy" "Boy" "Boy" "F"   "Boy" "F"
x <- "abcddefdae"
gsub("ab", "AB", x)
## [1] "ABcddefdae"
# Example 1 
A <- matrix(7:12, ncol=2)
B <- matrix(1:6, nrow=2, ncol=3, byrow=T)
A
##      [,1] [,2]
## [1,]    7   10
## [2,]    8   11
## [3,]    9   12
B
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
# Example 2
C <- matrix(nrow=2,ncol=3)
C[1,1] <- 1
C[1,2] <- 3
C[1,3] <- 5
C[2,1] <- 4
C[2,2] <- 7
C[2,3] <- 9
C
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    4    7    9
z <- matrix(1:12,3,4)
z[1, ] # the first row is extracted 
## [1]  1  4  7 10
z[ ,2] # the second column is extracted
## [1] 4 5 6
z[1,2:3] # the elements on the first row and on either second or third column are extracted. 
## [1] 4 7
z[,-c(2,4)] # the second and fourth columns are excluded
##      [,1] [,2]
## [1,]    1    7
## [2,]    2    8
## [3,]    3    9
z[-3,] # the third row is excluded
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
z[-1,-5]
##      [,1] [,2] [,3] [,4]
## [1,]    2    5    8   11
## [2,]    3    6    9   12
# Example (Matrix addition, Matrix multiplication)
x <- matrix(1:4,2,2)
x + x
##      [,1] [,2]
## [1,]    2    6
## [2,]    4    8
x * x
##      [,1] [,2]
## [1,]    1    9
## [2,]    4   16
x %*% x
##      [,1] [,2]
## [1,]    7   15
## [2,]   10   22
a <- 1:3
b <- as.matrix(a) # use as.matrix() to transform a vector to a matrix
dim(a) # check the dimension of a
## NULL
dim(b)
## [1] 3 1
# Creating Data Frames
names <- c('David', 'John', 'Mary')
quiz.1 <- c(89, 93, 85)
quiz.2 <- c(91, 88, 90)
Grade <- data.frame(names, quiz.1, quiz.2, stringsAsFactors = TRUE)
Grade
##   names quiz.1 quiz.2
## 1 David     89     91
## 2  John     93     88
## 3  Mary     85     90
str(Grade) # use str() function to know the structure of the data frame
## 'data.frame':    3 obs. of  3 variables:
##  $ names : Factor w/ 3 levels "David","John",..: 1 2 3
##  $ quiz.1: num  89 93 85
##  $ quiz.2: num  91 88 90
# extract the first variable
Grade$names
## [1] David John  Mary 
## Levels: David John Mary
Grade[,1]
## [1] David John  Mary 
## Levels: David John Mary
Grade[[1]]
## [1] David John  Mary 
## Levels: David John Mary
colnames(Grade)
## [1] "names"  "quiz.1" "quiz.2"
colnames(Grade) <- c("names", "Quiz_1", "Quiz_2")
colnames(Grade)
## [1] "names"  "Quiz_1" "Quiz_2"
Grade[3,2] # the third student's first quiz grade
## [1] 85
Grade$names[3] # the third student's name
## [1] Mary
## Levels: David John Mary
Grade[1:2,3] # the first two students' second quiz grade
## [1] 91 88
Grade[2,2:3] # the second student's first and second quiz grades
##   Quiz_1 Quiz_2
## 2     93     88
Grade1 <- data.frame(students = c('David', 'Gabby', 'Mary'), 
                    quiz_3=c(88, 92, 85), 
                    stringsAsFactors=TRUE)
Grade1
##   students quiz_3
## 1    David     88
## 2    Gabby     92
## 3     Mary     85
merge(Grade, Grade1, by.x="names", by.y="students")
##   names Quiz_1 Quiz_2 quiz_3
## 1 David     89     91     88
## 2  Mary     85     90     85
merge(Grade, Grade1, by.x="names", by.y="students", all.x=TRUE)
##   names Quiz_1 Quiz_2 quiz_3
## 1 David     89     91     88
## 2  John     93     88     NA
## 3  Mary     85     90     85
install.packages("tidyverse")
library(tidyverse)
library(tidyverse)
df1 <- read_csv("../data/bike_sharing_data.csv")
head(df1) # use head() to read the first six rows of the data
## # A tibble: 6 x 12
##   datetime season holiday workingday weather  temp atemp humidity windspeed
##   <chr>     <dbl>   <dbl>      <dbl>   <dbl> <dbl> <dbl>    <dbl>     <dbl>
## 1 1/1/201~      1       0          0       1  9.84  14.4       81      0   
## 2 1/1/201~      1       0          0       1  9.02  13.6       80      0   
## 3 1/1/201~      1       0          0       1  9.02  13.6       80      0   
## 4 1/1/201~      1       0          0       1  9.84  14.4       75      0   
## 5 1/1/201~      1       0          0       1  9.84  14.4       75      0   
## 6 1/1/201~      1       0          0       2  9.84  12.9       75      6.00
## # ... with 3 more variables: casual <dbl>, registered <dbl>, count <dbl>
glimpse(df1) # use glimpse() to get a glimpse of the data
## Rows: 17,379
## Columns: 12
## $ datetime   <chr> "1/1/2011 0:00", "1/1/2011 1:00", "1/1/2011 2:00", "1/1/...
## $ season     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ holiday    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ workingday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ weather    <dbl> 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3,...
## $ temp       <dbl> 9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.20, 9.84, 13...
## $ atemp      <dbl> 14.395, 13.635, 13.635, 14.395, 14.395, 12.880, 13.635, ...
## $ humidity   <dbl> 81, 80, 80, 75, 75, 75, 80, 86, 75, 76, 76, 81, 77, 72, ...
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.0032, 0.0000, ...
## $ casual     <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41...
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70...
## $ count      <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, ...
df2 <- read_delim("../data/iris.Data", delim=",", col_names = c("sepal_length", "sepal_width", "petal_length", "petal_width", "class"))
glimpse(df2)
## Rows: 150
## Columns: 5
## $ sepal_length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4,...
## $ sepal_width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7,...
## $ petal_length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5,...
## $ petal_width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2,...
## $ class        <chr> "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-set...
write_csv(df1, "../data/newfile.csv")
barplot(table(diamonds$cut))
barplot(table(diamonds$cut), col="blue", main="Distribution of Diamond Cut", horiz=TRUE, xlab="Number of Diamonds")
barplot(table(diamonds$cut), col="#69b3a2", main="Distribution of Diamond Cut", xlab="Number of Diamonds", names=c("F", "G", "VG", "P", "I"))
pie(table(diamonds$color), main="Distribution of Diamond Color")
H <- table(diamonds$color)
percent <- round(100*H/sum(H), 1) # calculate percentages
pie_labels <- paste(percent, "%", sep="") # include %
pie(H, main="My Best Piechart", labels=pie_labels, col = 2:8)
legend("topright", c("D","E","F","G","H","I","J"), cex=0.8, fill=2:8)
hist(diamonds$price, main="Distribution of Price of Diamonds", xlab="Price")
boxplot(diamonds$price, xlab="Price", ylab="Dollars")
boxplot(diamonds$price~diamonds$cut)
boxplot(diamonds$price ~ diamonds$cut, main="Distribution of Price of Diamonds among the Quality of the Cut", xlab="Quality", ylab="Price", col=11:15, cex.lab=1.25, cex.axis=1.25)
boxplot(price ~ cut + color, data = diamonds, main="Distribution of Price of Diamonds among the Quality of the Cut", xlab="Quality", ylab="Price",  cex.lab=1.25, cex.axis=1.25)
plot(price ~ carat, data=diamonds, xlab="Weight of Diamond", ylab="Price of Diamond")
Date <- 13:22
Dayton_OH <- c(84, 86, 91, 89, 89, 91, 92, 91, 91, 91)
Houston_TX <- c(100, 97, 96, 94, 94, 94, 93, 93, 92, 91)
Denver_CO <- c(95, 85, 89, 96, 97, 96, 92, 91, 95, 96)
Fargo_ND <- c(86, 80, 84, 87, 90, 87, 83, 84, 87, 89)
df <- data.frame(Date, Dayton_OH, Houston_TX, Denver_CO, Fargo_ND)

plot(Date, Dayton_OH, type="o", col="blue", xlab="Date in July", ylab="Highest Temperature", ylim=c(80, 100))
lines(Date, Houston_TX, type="o", col="red")
lines(Date, Denver_CO, type="o", col="purple")
lines(Date, Fargo_ND, type="o", col="darkgreen")
library(tidyverse)
library(conflicted)
conflict_prefer("select", "dplyr")
conflict_prefer("filter", "dplyr")
df <- read_csv("../data/bike_sharing_data.csv")
df1 <- df %>% arrange(temp, humidity)
df1[1:10,c("datetime","temp", "humidity", "count")] # print the first 10 rows to check the result
## # A tibble: 10 x 4
##    datetime        temp humidity count
##    <chr>          <dbl>    <dbl> <dbl>
##  1 1/4/2012 2:00   0.82       34     1
##  2 1/4/2012 3:00   0.82       34     1
##  3 1/4/2012 4:00   0.82       41     2
##  4 1/4/2012 5:00   0.82       41    14
##  5 1/4/2012 6:00   0.82       41    59
##  6 1/22/2011 6:00  0.82       44     4
##  7 1/22/2011 7:00  0.82       44    13
##  8 1/22/2011 8:00  0.82       44    28
##  9 1/4/2012 7:00   0.82       44   152
## 10 1/4/2012 8:00   0.82       44   315
table(df$holiday)
## 
##     0     1 
## 16879   500
df2 <- df %>% filter(holiday==1)
df2$count[1:10]
##  [1] 17 16  8  2  3  1  5 13 33 47
summary(df2$count) # find the summary statistics for count for holidays 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   34.75  120.00  168.37  265.25  712.00
summary(df$count) # the summary statistics for the original data
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1      42     141     187     277     977
df3 <- df %>% filter(holiday==1, season==2, temp>20)
dim(df3)
## [1] 81 12
summary(df3$count)  
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     6.0    84.0   243.0   229.2   337.0   712.0
summary(df2$count) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   34.75  120.00  168.37  265.25  712.00
df4 <- df %>% filter(holiday==1, season==2|season==3, temp>20) 
dim(df4)
## [1] 177  12
summary(df4$count)  
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0    60.0   229.0   233.9   375.0   712.0
summary(df3$count) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     6.0    84.0   243.0   229.2   337.0   712.0
table(df$weather)
## 
##     1     2     3     4 
## 11413  4544  1419     3
df5 <- df %>% group_by(weather)
df5
## # A tibble: 17,379 x 12
## # Groups:   weather [4]
##    datetime season holiday workingday weather  temp atemp humidity windspeed
##    <chr>     <dbl>   <dbl>      <dbl>   <dbl> <dbl> <dbl>    <dbl>     <dbl>
##  1 1/1/201~      1       0          0       1  9.84  14.4       81      0   
##  2 1/1/201~      1       0          0       1  9.02  13.6       80      0   
##  3 1/1/201~      1       0          0       1  9.02  13.6       80      0   
##  4 1/1/201~      1       0          0       1  9.84  14.4       75      0   
##  5 1/1/201~      1       0          0       1  9.84  14.4       75      0   
##  6 1/1/201~      1       0          0       2  9.84  12.9       75      6.00
##  7 1/1/201~      1       0          0       1  9.02  13.6       80      0   
##  8 1/1/201~      1       0          0       1  8.2   12.9       86      0   
##  9 1/1/201~      1       0          0       1  9.84  14.4       75      0   
## 10 1/1/201~      1       0          0       1 13.1   17.4       76      0   
## # ... with 17,369 more rows, and 3 more variables: casual <dbl>,
## #   registered <dbl>, count <dbl>
df6 <- df %>% group_by(weather) %>% summarize(ave_temp = mean(temp), 
                                              ave_atemp = mean(atemp),
                                              ave_humidity = mean(humidity),
                                              ave_windspeed = mean(windspeed),
                                              cases = n())

df6
## # A tibble: 4 x 6
##   weather ave_temp ave_atemp ave_humidity ave_windspeed cases
##     <dbl>    <dbl>     <dbl>        <dbl>         <dbl> <int>
## 1       1    21.0      24.4          57.4          12.8 11413
## 2       2    19.5      22.8          69.9          12.1  4544
## 3       3    18.7      21.8          82.8          14.7  1419
## 4       4     7.65      9.35         88.3          13.7     3
df7 <- df %>% mutate(F_temp=9/5*temp+32, F_atemp=9/5*atemp+32)
glimpse(df7)
## Rows: 17,379
## Columns: 14
## $ datetime   <chr> "1/1/2011 0:00", "1/1/2011 1:00", "1/1/2011 2:00", "1/1/...
## $ season     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ holiday    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ workingday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ weather    <dbl> 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3,...
## $ temp       <dbl> 9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.20, 9.84, 13...
## $ atemp      <dbl> 14.395, 13.635, 13.635, 14.395, 14.395, 12.880, 13.635, ...
## $ humidity   <dbl> 81, 80, 80, 75, 75, 75, 80, 86, 75, 76, 76, 81, 77, 72, ...
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.0032, 0.0000, ...
## $ casual     <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41...
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70...
## $ count      <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, ...
## $ F_temp     <dbl> 49.712, 48.236, 48.236, 49.712, 49.712, 49.712, 48.236, ...
## $ F_atemp    <dbl> 57.911, 56.543, 56.543, 57.911, 57.911, 55.184, 56.543, ...
df8 <- df7 %>% mutate(temp_level=ifelse(F_temp>85, "high", "low"))
glimpse(df8)
## Rows: 17,379
## Columns: 15
## $ datetime   <chr> "1/1/2011 0:00", "1/1/2011 1:00", "1/1/2011 2:00", "1/1/...
## $ season     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ holiday    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ workingday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ weather    <dbl> 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3,...
## $ temp       <dbl> 9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.20, 9.84, 13...
## $ atemp      <dbl> 14.395, 13.635, 13.635, 14.395, 14.395, 12.880, 13.635, ...
## $ humidity   <dbl> 81, 80, 80, 75, 75, 75, 80, 86, 75, 76, 76, 81, 77, 72, ...
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.0032, 0.0000, ...
## $ casual     <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41...
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70...
## $ count      <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, ...
## $ F_temp     <dbl> 49.712, 48.236, 48.236, 49.712, 49.712, 49.712, 48.236, ...
## $ F_atemp    <dbl> 57.911, 56.543, 56.543, 57.911, 57.911, 55.184, 56.543, ...
## $ temp_level <chr> "low", "low", "low", "low", "low", "low", "low", "low", ...
table(df8$temp_level)
## 
##  high   low 
##  2685 14694
df9 <- df %>% select(temp, atemp, humidity, windspeed, casual, registered, count)
glimpse(df9)
## Rows: 17,379
## Columns: 7
## $ temp       <dbl> 9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.20, 9.84, 13...
## $ atemp      <dbl> 14.395, 13.635, 13.635, 14.395, 14.395, 12.880, 13.635, ...
## $ humidity   <dbl> 81, 80, 80, 75, 75, 75, 80, 86, 75, 76, 76, 81, 77, 72, ...
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.0032, 0.0000, ...
## $ casual     <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41...
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70...
## $ count      <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, ...
df10 <- df %>% select(-c(datetime, season, holiday, workingday, weather))
glimpse(df10)
## Rows: 17,379
## Columns: 7
## $ temp       <dbl> 9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.20, 9.84, 13...
## $ atemp      <dbl> 14.395, 13.635, 13.635, 14.395, 14.395, 12.880, 13.635, ...
## $ humidity   <dbl> 81, 80, 80, 75, 75, 75, 80, 86, 75, 76, 76, 81, 77, 72, ...
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.0032, 0.0000, ...
## $ casual     <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41...
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70...
## $ count      <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, ...
sum(is.na(df)) # returns 0 if there is no missing values
## [1] 0
sum(is.na(airquality))   
## [1] 44
df_air <- airquality %>% drop_na()
sum(is.na(df_air))
## [1] 0
air_scale <- airquality %>% mutate_all(scale)
glimpse(df) # check the original data structure again
## Rows: 17,379
## Columns: 12
## $ datetime   <chr> "1/1/2011 0:00", "1/1/2011 1:00", "1/1/2011 2:00", "1/1/...
## $ season     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ holiday    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ workingday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ weather    <dbl> 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3,...
## $ temp       <dbl> 9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.20, 9.84, 13...
## $ atemp      <dbl> 14.395, 13.635, 13.635, 14.395, 14.395, 12.880, 13.635, ...
## $ humidity   <dbl> 81, 80, 80, 75, 75, 75, 80, 86, 75, 76, 76, 81, 77, 72, ...
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.0032, 0.0000, ...
## $ casual     <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41...
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70...
## $ count      <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, ...
df11 <- df %>% mutate_at(c("season", "holiday", "workingday", "weather"), as.factor)
glimpse(df11) # check the new data structure 
## Rows: 17,379
## Columns: 12
## $ datetime   <chr> "1/1/2011 0:00", "1/1/2011 1:00", "1/1/2011 2:00", "1/1/...
## $ season     <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ holiday    <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ workingday <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ weather    <fct> 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3,...
## $ temp       <dbl> 9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.20, 9.84, 13...
## $ atemp      <dbl> 14.395, 13.635, 13.635, 14.395, 14.395, 12.880, 13.635, ...
## $ humidity   <dbl> 81, 80, 80, 75, 75, 75, 80, 86, 75, 76, 76, 81, 77, 72, ...
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.0032, 0.0000, ...
## $ casual     <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41...
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70...
## $ count      <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, ...
df11 <- df11 %>% mutate_if(is.numeric, scale) %>% mutate_if(is.numeric, round, digits = 1)
df11
## # A tibble: 17,379 x 12
##    datetime season holiday workingday weather temp[,1] atemp[,1] humidity[,1]
##    <chr>    <fct>  <fct>   <fct>      <fct>      <dbl>     <dbl>        <dbl>
##  1 1/1/201~ 1      0       0          1           -1.3      -1.1          0.9
##  2 1/1/201~ 1      0       0          1           -1.4      -1.2          0.9
##  3 1/1/201~ 1      0       0          1           -1.4      -1.2          0.9
##  4 1/1/201~ 1      0       0          1           -1.3      -1.1          0.6
##  5 1/1/201~ 1      0       0          1           -1.3      -1.1          0.6
##  6 1/1/201~ 1      0       0          2           -1.3      -1.3          0.6
##  7 1/1/201~ 1      0       0          1           -1.4      -1.2          0.9
##  8 1/1/201~ 1      0       0          1           -1.5      -1.3          1.2
##  9 1/1/201~ 1      0       0          1           -1.3      -1.1          0.6
## 10 1/1/201~ 1      0       0          1           -0.9      -0.7          0.7
## # ... with 17,369 more rows, and 4 more variables: windspeed[,1] <dbl>,
## #   casual[,1] <dbl>, registered[,1] <dbl>, count[,1] <dbl>
library(tidyverse)
library(DataExplorer)

df_states <- read_csv("https://covidtracking.com/api/v1/states/daily.csv")
glimpse(df_states)
## Rows: 7,857
## Columns: 41
## $ date                     <dbl> 20200723, 20200723, 20200723, 20200723, 20...
## $ state                    <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO", ...
## $ positive                 <dbl> 2684, 74212, 36259, 0, 152944, 425616, 416...
## $ negative                 <dbl> 186825, 545315, 410221, 1037, 669769, 6352...
## $ pending                  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 26...
## $ hospitalizedCurrently    <dbl> 36, 1547, 480, NA, 2966, 8820, 351, 72, 91...
## $ hospitalizedCumulative   <dbl> NA, 8995, 2361, NA, 7236, NA, 6133, 10712,...
## $ inIcuCurrently           <dbl> NA, NA, NA, NA, 851, 2284, NA, NA, 22, 7, ...
## $ inIcuCumulative          <dbl> NA, 1043, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ onVentilatorCurrently    <dbl> 1, NA, 107, NA, 617, NA, NA, NA, 9, NA, NA...
## $ onVentilatorCumulative   <dbl> NA, 553, 329, NA, NA, NA, NA, NA, NA, NA, ...
## $ recovered                <dbl> 787, 32510, 28864, NA, 19737, NA, 5095, 85...
## $ dataQualityGrade         <chr> "A", "B", "A+", "C", "A+", "B", "A", "B", ...
## $ lastUpdateEt             <chr> "7/23/2020 00:00", "7/23/2020 11:00", "7/2...
## $ dateModified             <dttm> 2020-07-23 00:00:00, 2020-07-23 11:00:00,...
## $ checkTimeEt              <chr> "07/22 20:00", "07/23 07:00", "07/23 10:46...
## $ death                    <dbl> 19, 1397, 386, 0, 3063, 8027, 1643, 4410, ...
## $ hospitalized             <dbl> NA, 8995, 2361, NA, 7236, NA, 6133, 10712,...
## $ dateChecked              <dttm> 2020-07-23 00:00:00, 2020-07-23 11:00:00,...
## $ totalTestsViral          <dbl> 189509, 618011, 445467, NA, 822713, 677830...
## $ positiveTestsViral       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 48...
## $ negativeTestsViral       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 35...
## $ positiveCasesViral       <dbl> 2684, 72696, 36259, 0, 137710, 425616, 385...
## $ deathConfirmed           <dbl> 19, 1357, NA, NA, 2431, NA, NA, 3530, NA, ...
## $ deathProbable            <dbl> NA, 40, NA, NA, 152, NA, NA, 880, NA, 58, ...
## $ fips                     <chr> "02", "01", "05", "60", "04", "06", "08", ...
## $ positiveIncrease         <dbl> 65, 2399, 1013, 0, 2335, 12040, 639, 9, 42...
## $ negativeIncrease         <dbl> 4111, 7640, 5241, 0, 6397, 101845, 7370, 1...
## $ total                    <dbl> 189509, 619527, 446480, 1037, 822713, 6778...
## $ totalTestResults         <dbl> 189509, 619527, 446480, 1037, 822713, 6778...
## $ totalTestResultsIncrease <dbl> 4176, 10039, 6254, 0, 8732, 113885, 8009, ...
## $ posNeg                   <dbl> 189509, 619527, 446480, 1037, 822713, 6778...
## $ deathIncrease            <dbl> 0, 33, 6, 0, 89, 157, 0, 4, 1, 2, 173, 25,...
## $ hospitalizedIncrease     <dbl> 0, 457, 44, 0, 189, 0, 23, 58, 0, 0, 403, ...
## $ hash                     <chr> "250b2f86b7f497e40057c76c9c34280febdd150b"...
## $ commercialScore          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
plot_intro(df_states)
plot_missing(df_states)
df_states <- df_states %>% select(c(date, state, totalTestResultsIncrease, positiveIncrease, negativeIncrease, deathIncrease, hospitalizedIncrease, death))
glimpse(df_states)
## Rows: 7,857
## Columns: 8
## $ date                     <dbl> 20200723, 20200723, 20200723, 20200723, 20...
## $ state                    <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO", ...
## $ totalTestResultsIncrease <dbl> 4176, 10039, 6254, 0, 8732, 113885, 8009, ...
## $ positiveIncrease         <dbl> 65, 2399, 1013, 0, 2335, 12040, 639, 9, 42...
## $ negativeIncrease         <dbl> 4111, 7640, 5241, 0, 6397, 101845, 7370, 1...
## $ deathIncrease            <dbl> 0, 33, 6, 0, 89, 157, 0, 4, 1, 2, 173, 25,...
## $ hospitalizedIncrease     <dbl> 0, 457, 44, 0, 189, 0, 23, 58, 0, 0, 403, ...
## $ death                    <dbl> 19, 1397, 386, 0, 3063, 8027, 1643, 4410, ...
df_states$date <- df_states$date %>% as.factor() # change the data type for date
plot_bar(df_states, maxcat=56)
plot_bar(df_states, with="deathIncrease", maxcat=56)
plot_histogram(df_states, ncol=3) 
plot_boxplot(df_states %>% select(c(state, positiveIncrease, deathIncrease)), by = "state")
plot_scatterplot(df_states %>% drop_na(), by = "death", sampled_rows=1000)
plot_scatterplot(df_states %>% filter(state=="OH") %>% select(-c(state)) %>% drop_na(), by = "death", ncol=2)
plot_correlation(df_states %>% select(-c(date,state)), cor_args = list( "use" = "complete.obs"))
create_report(df_states %>% filter(!(state %in% c("DC", "AS", "GU", "MP", "PR", "VI"))), output_file = "report.html", output_dir = "I:/Shared drives/R Short Course 2020 Summer/code")
df_DV <- df_states %>% group_by(state) %>% summarize(
  Positive = sum(positiveIncrease),
  Negative = sum(negativeIncrease), 
  Death = sum(deathIncrease),
  Hospitalized = sum(hospitalizedIncrease))
DT::datatable(df_DV)
ggplot(data = df_DV, aes(x = Positive)) + geom_histogram() 
p1 <- ggplot(data = df_DV, aes(x = Positive)) + geom_histogram(fill="#20B2AA") + 
  labs(x = "Confirmed Cases", title = "Distribution of Confirmed Cases of COVID-19 in the United States") +
  theme(axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 12),  
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12))

p1
options(scipen=10000) # put a high number so that R doesn't switch numbers to scientific notation

p2 <- ggplot(data = df_DV, aes(x = state, y = Positive)) + geom_bar(stat = "identity")

p2
p3 <- ggplot(data = df_states, aes(x = state, y = positiveIncrease)) +
    geom_boxplot() + 
  labs(title = "Distribution of Daily Increased Confirmed Cases of Each State", y = "Confirmed Cases") + 
  theme(axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14))
p3
p4 <- ggplot(data = df_DV, aes(x = Positive, y = Death)) + geom_point(col="#20B2AA") +
  labs(y = "Death in Each State", x = "Confirmed Cases of COVID-19 in Each State")

p4
df_DV$pred.death <- predict(lm(Death ~ Positive, data = df_DV)) # add the prediction from the linear model

p5 <- ggplot(data = df_DV, aes(x = Positive, y = Death)) + geom_point(col="#20B2AA")

p5 + geom_line(aes(y = pred.death))
ggplot(data = diamonds, aes(x = carat , y = price)) + geom_point() + facet_wrap(facets = vars(cut))
library(maps)

# Retrieve the states map data 
state_map <- map_data("state")

# We need to match the state column in our data with the "region" column in state_map
df_DV <- df_DV %>% mutate(region = tolower(state.name[match(df_DV$state, state.abb)])) 

# merge state_map data with df_DV data
death_map <- left_join(state_map, df_DV, by = "region") 

# Create the map
p6 <- ggplot(death_map, aes(x = long, y = lat, group = group)) +
  geom_polygon(aes(fill = Death), color = "white")
p6
state_location <- data.frame(state = state.abb, long = state.center$x, lat = state.center$y)
new_death_map <- left_join(state_location, df_DV, by = "state")

p7 <- ggplot(death_map, aes(x = long, y = lat)) +
  geom_polygon(aes(group = group, fill = Death), color = "black") 

p7 + geom_text(aes(label = paste0(state, "\n ", Death)), data = new_death_map, color = "black", size = 3, fontface=3, hjust=0.5, vjust=0.5) +
  scale_fill_continuous(low ="#ade8f4", high = "#0077b6") +
  theme(legend.position="none", axis.text.x = element_blank(),
        axis.text.y = element_blank(),  
        axis.title.x = element_blank(),
        axis.title.y = element_blank())
# fontface allows values: 1(normal), 2(bold), 3(italic), 4(bold.italic)
library(plotly)
ggplotly(p1)
ggplotly(p2)
ggplotly(p3)
ggplotly(p4)
ggplotly(p5)
p8 <- ggplot(death_map, aes(x = long, y = lat, group = group, text = state)) +
  geom_polygon(aes(fill = Death), color = "white") + 
  labs(title="Distribution of COVID-19 Death in the United States")

ggplotly(p8, width=1440, height=900)
states <- sample(state.abb, 5) # sample 5 states from the United States
names <- c('David', 'John', 'Mary')
quiz.1 <- c(89, 93, 85)
quiz.2 <- c(91, 88, 90)
Grade <- data.frame(names, quiz.1, quiz.2, stringsAsFactors = TRUE)
Y <- list(states, Grade, "Hello", 3)
Y
## [[1]]
## [1] "MD" "ID" "KY" "WV" "UT"
## 
## [[2]]
##   names quiz.1 quiz.2
## 1 David     89     91
## 2  John     93     88
## 3  Mary     85     90
## 
## [[3]]
## [1] "Hello"
## 
## [[4]]
## [1] 3
names(Y) <- c("State", "Grade", "Text", "Number")
Y$State
## [1] "MD" "ID" "KY" "WV" "UT"
Y[[1]] # obtain the object stored in the first component of the list
## [1] "MD" "ID" "KY" "WV" "UT"
Y$Grade$quiz.1 # this returns the quiz.1 grade in the second compoent of the list
## [1] 89 93 85
Y[[2]][,2]
## [1] 89 93 85
Y$Grade$quiz.2[3] # this returns the third person's (Mary) quiz 2 grade
## [1] 90
Y[[2]][3,3] # consistent with Y$Grade$quiz.2[3]
## [1] 90
x <- 11

if (x%%2 == 1){
  print(paste(x, "is an odd number."))
}
## [1] "11 is an odd number."
x <- 11

if (x%%2 == 1){
  print(paste(x, "is an odd number."))
}else{
  print(paste(x, "is not an odd number."))
}
## [1] "11 is an odd number."
x <- sample(1:100, 10)
ifelse(x%%2==1, "Odd", "Even")
##  [1] "Even" "Odd"  "Odd"  "Even" "Even" "Odd"  "Odd"  "Even" "Even" "Even"
check_numbers <- function(n){
  if (n%%2 == 1){
    return(paste(n, "is an odd number."))
  }else{
    return(paste(n, "is an even number."))
  }
}

check_numbers(10)
## [1] "10 is an even number."
# Example (recursive power function)

pow <- function(a, n){
  if (n == 0){
    return(1)
  } else {
    return(a*pow(a,n-1))
  }
}

pow(2,10) # find 2^10
## [1] 1024
# Example (Find the nth Fibonacci Number)

Fib <- function(n){
  if (n == 0 || n == 1){
    return(n)
  } else{
    return(Fib(n-1)+Fib(n-2))
  }
}

Fib(20) # find the 100th Fibonacci number
## [1] 6765
# set up the timer
t0 <- proc.time()
total <- 0
for (i in seq(1,399)){
  total <- total+i * (i+1)
}
total
## [1] 21333200
# print the time used to obtain the answer
proc.time()-t0 
##    user  system elapsed 
##    0.02    0.00    0.02
# Example (Application of Fix point theorem)
# We want to find a solution for sin(x)-1-x=0
# Define the function f(x) = sin(x)-1

find_solution <- function(x) sin(x)-1
precision <- 0.0001
step <- 0 
x0 <- 0.5
error <- 1

# the while loop will be executed when the error is higher that the precision and the step is less than 100

while (error > precision && step < 100){
  xn <- find_solution(x0)
  error <- abs(xn-x0)
  x0 <- xn 
  step <- step + 1
}

if (step == 100){
  print('We can not find the root with 100 iterations.')
}else{
  print(paste('The root of sinx = x + 1 is about ', x0))
  print(paste('It takes ', step, 'iterations.'))
}
## [1] "The root of sinx = x + 1 is about  -1.93458022062346"
## [1] "It takes  11 iterations."
a <- c(0, 0.85, 0.2, -0.15)
b <- c(0, 0.04, -0.26, 0.28)
c <- c(0, -0.04, 0.23, 0.26)
d <- c(0.16, 0.85, 0.22, 0.24)
e <- c(0, 0, 0, 0)
f <- c(0, 1.6, 1.6, 0.44)

numits <- 2000 # number of iterations
x <- 0
y <- 0

par(bg="black") # change the color for background 

plot(seq(-2, 10, by = 0.1), seq(-2, 10, by = 0.1), type = "n", 
     main = "fractal fern")

for (n in seq(1,numits)){
  k <- sample(1:4, size = 1, replace = TRUE, prob = c(0.01, 0.85, 0.07, 0.07))
  newx <- a[k]*x + b[k]*y + e[k]
  newy <- c[k]*x + d[k]*y + f[k]
  x <- newx
  y <- newy
  if (n>10){
    points(x+3,y, col = "green", cex=0.5, pch=20)
  }
}
# Another Example (Fractal Tree)

a <- c(0, 0.42, 0.42, 0.1)
b <- c(0, -0.42, 0.42, 0)
c <- c(0, 0.42, -0.42, 0)
d <- c(0.5, 0.42, 0.42, 0.1)
e <- c(0, 0, 0, 0)
f <- c(0, 0.2, 0.2, 0.2)

numits <- 5000 
x <- 0
y <- 0

par(bg="black")

plot(seq(-0.3, 0.3, by = 0.1), seq(0, 0.3, by = 0.05), type = "n", 
     main = "fractal fern")

for (n in seq(1,numits)){
  k <- sample(1:4, size = 1, replace = TRUE, prob = c(0.05, 0.4, 0.4, 0.15))
  newx <- a[k]*x + b[k]*y + e[k]
  newy <- c[k]*x+d[k]*y+f[k]
  x <- newx
  y <- newy
  if (n>10){
    points(x,y, col = "green", cex=0.5, pch=20)
  }
}

Introduction to R

README

Session 1: Overview for R and RStudio, Basic Syntax and Data Types

Brief Overview

Interface of RStudio

Basic Syntax

Basic Data Types

More about String & Vector

Session 2: Basic Data Structures, Managing Data, Installing and Loading Packages, and Importing data and Writing files

Brief Overview

Matrix

Data Frame

Installing and Loading Packages

Importing Data and Writing Files

Session 3: Basic Graphical Displays

Brief Overview

Bar Chart

Pie Chart

Histogram

Boxplot

Scatterplot

Line Plot

Session 4: Data Manipulation

Brief Overview

Pipe

arrange()

filter()

group_by() & summarize()

mutate()

select()

Other Useful functions

drop_na() function

Mutate Multiple Columns

Session 5: Data Exploration & Visualization

Brief Overview

Data Exploration with R Package: DataExplorer

Data Visualization with R Package: ggplot2

Grammar of Graphics

ggplot() function

Data Visualization with R Package: plotly

Session 6: Conditional Statements, Functions & Loops

Brief Overview

List

Conditional Statements

Creation of Functions

Loops

for() Loops

while() Loops

Advanced Examples

Some Useful Information

Open Data Sources

Where to get help