Preliminaries
library(agricolae)
Open datasets from ‘agricolae’
data(sweetpotato) # rx is a numeric vector
data(corn)
head(corn)
## method observation rx
## 1 1 83 11.0
## 2 1 91 23.0
## 3 1 94 28.5
## 4 1 89 17.0
## 5 1 89 17.0
## 6 1 96 31.5
head(sweetpotato)
## virus yield
## 1 cc 28.5
## 2 cc 21.7
## 3 cc 23.0
## 4 fc 14.9
## 5 fc 10.6
## 6 fc 13.1
str(corn)
## 'data.frame': 34 obs. of 3 variables:
## $ method : int 1 1 1 1 1 1 1 1 1 2 ...
## $ observation: int 83 91 94 89 89 96 91 92 90 91 ...
## $ rx : num 11 23 28.5 17 17 31.5 23 26 19.5 23 ...
str(sweetpotato)
## 'data.frame': 12 obs. of 2 variables:
## $ virus: Factor w/ 4 levels "cc","fc","ff",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ yield: num 28.5 21.7 23 14.9 10.6 13.1 41.8 39.2 28 38.2 ...
Some basic functions
# Creating our variables
x <-seq(25,65,5)
y <-c(2,6,9,7,3,1)
Example of graphs using agricolae
graph <-graph.freq(x,counts = y,col=colors()[65],xlab=" ", ylab="Frequency",
axes=FALSE)
axis(1,x)
axis(2,0:10)
polygon.freq(graph,col="red",xlab=" ", ylab="")
title(main="Histogram and polygon", xlab="Variable X")
# Imagine that we have other variables to graph
axis(4,0:10)

Experimental designs
4 treatments and 5 blocks
treatments<-c("H","O","P","E")
repetition <- 4
# Generating Randomized Complete Block Design (standard design for agricultural experiments. The field or orchard is divided into units to account for any variation in the field. Treatments are then assigned at random to the subjects in the blocks-once in each block.)
outdesign <-design.rcbd(treatments,
r = repetition,serie=2,
seed = 513,kinds = "Super-Duper") # Arguments -> r = Replications or blocks, serie = number plot, # kinds = method for to randomize
outdesign
## $parameters
## $parameters$design
## [1] "rcbd"
##
## $parameters$trt
## [1] "H" "O" "P" "E"
##
## $parameters$r
## [1] 4
##
## $parameters$serie
## [1] 2
##
## $parameters$seed
## [1] 513
##
## $parameters$kinds
## [1] "Super-Duper"
##
## $parameters[[7]]
## [1] TRUE
##
##
## $sketch
## [,1] [,2] [,3] [,4]
## [1,] "P" "E" "O" "H"
## [2,] "H" "E" "O" "P"
## [3,] "H" "O" "E" "P"
## [4,] "O" "H" "P" "E"
##
## $book
## plots block treatments
## 1 101 1 P
## 2 102 1 E
## 3 103 1 O
## 4 104 1 H
## 5 201 2 H
## 6 202 2 E
## 7 203 2 O
## 8 204 2 P
## 9 301 3 H
## 10 302 3 O
## 11 303 3 E
## 12 304 3 P
## 13 401 4 O
## 14 402 4 H
## 15 403 4 P
## 16 404 4 E
RCBD <- outdesign$book
RCBD
## plots block treatments
## 1 101 1 P
## 2 102 1 E
## 3 103 1 O
## 4 104 1 H
## 5 201 2 H
## 6 202 2 E
## 7 203 2 O
## 8 204 2 P
## 9 301 3 H
## 10 302 3 O
## 11 303 3 E
## 12 304 3 P
## 13 401 4 O
## 14 402 4 H
## 15 403 4 P
## 16 404 4 E
Sketch <- outdesign$sketch
Sketch
## [,1] [,2] [,3] [,4]
## [1,] "P" "E" "O" "H"
## [2,] "H" "E" "O" "P"
## [3,] "H" "O" "E" "P"
## [4,] "O" "H" "P" "E"
# Generating Latin Square Design (The Latin square design is used where the researcher desires to control the variation in an experiment that is related to rows and columns in the field.)
varieties<-c("green","blue","white","red")
outdesign <-design.lsd(varieties,serie=2,seed=540)
LSD <- outdesign$book
print(outdesign$sketch)
## [,1] [,2] [,3] [,4]
## [1,] "red" "blue" "green" "white"
## [2,] "white" "green" "red" "blue"
## [3,] "blue" "red" "white" "green"
## [4,] "green" "white" "blue" "red"
print(LSD) # field book.
## plots row col varieties
## 1 101 1 1 red
## 2 102 1 2 blue
## 3 103 1 3 green
## 4 104 1 4 white
## 5 201 2 1 white
## 6 202 2 2 green
## 7 203 2 3 red
## 8 204 2 4 blue
## 9 301 3 1 blue
## 10 302 3 2 red
## 11 303 3 3 white
## 12 304 3 4 green
## 13 401 4 1 green
## 14 402 4 2 white
## 15 403 4 3 blue
## 16 404 4 4 red
books <-as.numeric(LSD[,1])
print(matrix(books,byrow = TRUE, ncol = 4))
## [,1] [,2] [,3] [,4]
## [1,] 101 102 103 104
## [2,] 201 202 203 204
## [3,] 301 302 303 304
## [4,] 401 402 403 404
# Several design options are accompanied by complementary analysis functions
#Alpha designs
Treatment <-paste("treatment",1:30,sep="") # the number of treatments must be multiple of k (size block)
r<- 2 # replication
k<- 3 # block size
outdesign <-design.alpha(Treatment,k,r,seed=543)
##
## Alpha Design (0,1) - Serie I
##
## Parameters Alpha Design
## =======================
## Treatmeans : 30
## Block size : 3
## Blocks : 10
## Replication: 2
##
## Efficiency factor
## (E ) 0.6170213
##
## <<< Book >>>
Agricolae allows you to conduct a bunch of multiple parametric and non-paprametric comparisons.
Parametric comparisons (e.g. Difference test (HSD.test()), the Duncan test (duncan.test()), the Scheffe test (scheffe.test()), among others)
Non-Parametric comparisons (e.g. kruskal(), waerden.test(), friedman() and durbin.test()).
An important note to avoid possible confusion: R itself provides the functions kruskal.test() and friedman.test() - these report only a single value; whereas the functions provided by agricolae perform multiple comparisons
data(corn)
kruskal.test(corn$observation, corn$method)
##
## Kruskal-Wallis rank sum test
##
## data: corn$observation and corn$method
## Kruskal-Wallis chi-squared = 25.629, df = 3, p-value = 1.141e-05
agri_krusk = kruskal(corn$observation, corn$method, group = TRUE, main = "corn") # Main = title
agri_krusk
## $statistics
## Chisq Df p.chisq
## 25.62884 3 1.140573e-05
##
## $parameters
## test p.ajusted name.t ntr alpha
## Kruskal-Wallis none corn$method 4 0.05
##
## $means
## corn.observation rank std r Min Max Q25 Q50 Q75
## 1 90.55556 21.83333 3.643869 9 83 96 89.00 91.0 92.00
## 2 86.40000 15.30000 3.777124 10 81 91 83.25 86.0 89.75
## 3 95.71429 29.57143 3.638419 7 91 101 93.50 95.0 98.00
## 4 79.87500 4.81250 1.726888 8 77 82 78.75 80.5 81.00
##
## $comparison
## NULL
##
## $groups
## corn$observation groups
## 3 29.57143 a
## 1 21.83333 b
## 2 15.30000 c
## 4 4.81250 d
##
## attr(,"class")
## [1] "group"
agri_krusk = kruskal(corn$observation, corn$method, group = FALSE, main = "corn") # Main = title
agri_krusk
## $statistics
## Chisq Df p.chisq
## 25.62884 3 1.140573e-05
##
## $parameters
## test p.ajusted name.t ntr alpha
## Kruskal-Wallis none corn$method 4 0.05
##
## $means
## corn.observation rank std r Min Max Q25 Q50 Q75
## 1 90.55556 21.83333 3.643869 9 83 96 89.00 91.0 92.00
## 2 86.40000 15.30000 3.777124 10 81 91 83.25 86.0 89.75
## 3 95.71429 29.57143 3.638419 7 91 101 93.50 95.0 98.00
## 4 79.87500 4.81250 1.726888 8 77 82 78.75 80.5 81.00
##
## $comparison
## Difference pvalue Signif. LCL UCL
## 1 - 2 6.533333 0.0071 ** 1.916316 11.150351
## 1 - 3 -7.738095 0.0040 ** -12.802119 -2.674072
## 1 - 4 17.020833 0.0000 *** 12.138086 21.903580
## 2 - 3 -14.271429 0.0000 *** -19.223438 -9.319419
## 2 - 4 10.487500 0.0001 *** 5.721026 15.253974
## 3 - 4 24.758929 0.0000 *** 19.558279 29.959578
##
## $groups
## NULL
##
## attr(,"class")
## [1] "group"
# Same issue with Tukey
data("sweetpotato")
head(sweetpotato)
## virus yield
## 1 cc 28.5
## 2 cc 21.7
## 3 cc 23.0
## 4 fc 14.9
## 5 fc 10.6
## 6 fc 13.1
Potato = aov(yield~virus, data= sweetpotato)
summary(Potato)
## Df Sum Sq Mean Sq F value Pr(>F)
## virus 3 1170.2 390.1 17.34 0.000733 ***
## Residuals 8 179.9 22.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(Potato) # default TukeyHSD test on the anova
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = yield ~ virus, data = sweetpotato)
##
## $virus
## diff lwr upr p adj
## fc-cc -11.5333333 -23.9330031 0.8663365 0.0685547
## ff-cc 11.9333333 -0.4663365 24.3330031 0.0592490
## oo-cc 12.5000000 0.1003302 24.8996698 0.0482107
## ff-fc 23.4666667 11.0669969 35.8663365 0.0013612
## oo-fc 24.0333333 11.6336635 36.4330031 0.0011621
## oo-ff 0.5666667 -11.8330031 12.9663365 0.9987858
agricolaeTukey= HSD.test(Potato, "virus", group = TRUE, main = "Yield of sweet poptato with different virus")
agricolaeTukey
## $statistics
## MSerror Df Mean CV MSD
## 22.48917 8 27.625 17.1666 12.39967
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey virus 4 4.52881 0.05
##
## $means
## yield std r Min Max Q25 Q50 Q75
## cc 24.40000 3.609709 3 21.7 28.5 22.35 23.0 25.75
## fc 12.86667 2.159475 3 10.6 14.9 11.85 13.1 14.00
## ff 36.33333 7.333030 3 28.0 41.8 33.60 39.2 40.50
## oo 36.90000 4.300000 3 32.1 40.4 35.15 38.2 39.30
##
## $comparison
## NULL
##
## $groups
## yield groups
## oo 36.90000 a
## ff 36.33333 ab
## cc 24.40000 bc
## fc 12.86667 c
##
## attr(,"class")
## [1] "group"
agricolaeTukey= HSD.test(Potato, "virus", group = FALSE, main = "Yield of sweet poptato with different virus")
agricolaeTukey
## $statistics
## MSerror Df Mean CV MSD
## 22.48917 8 27.625 17.1666 12.39967
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey virus 4 4.52881 0.05
##
## $means
## yield std r Min Max Q25 Q50 Q75
## cc 24.40000 3.609709 3 21.7 28.5 22.35 23.0 25.75
## fc 12.86667 2.159475 3 10.6 14.9 11.85 13.1 14.00
## ff 36.33333 7.333030 3 28.0 41.8 33.60 39.2 40.50
## oo 36.90000 4.300000 3 32.1 40.4 35.15 38.2 39.30
##
## $comparison
## difference pvalue signif. LCL UCL
## cc - fc 11.5333333 0.0686 . -0.8663365 23.9330031
## cc - ff -11.9333333 0.0592 . -24.3330031 0.4663365
## cc - oo -12.5000000 0.0482 * -24.8996698 -0.1003302
## fc - ff -23.4666667 0.0014 ** -35.8663365 -11.0669969
## fc - oo -24.0333333 0.0012 ** -36.4330031 -11.6336635
## ff - oo -0.5666667 0.9988 -12.9663365 11.8330031
##
## $groups
## NULL
##
## attr(,"class")
## [1] "group"
Graphs for multiple comparisons
data(sweetpotato)
model<-aov(yield~virus,data=sweetpotato)
out <- waller.test(model,"virus", console=TRUE,
main="Yield of sweetpotato with different virus")
##
## Study: Yield of sweetpotato with different virus
##
## Waller-Duncan K-ratio t Test for yield
##
## This test minimizes the Bayes risk under additive loss and certain other assumptions
## ......
## K ratio 100.00000
## Error Degrees of Freedom 8.00000
## Error Mean Square 22.48917
## F value 17.34478
## Critical Value of Waller 2.23600
##
## virus, means
##
## yield std r Min Max
## cc 24.40000 3.609709 3 21.7 28.5
## fc 12.86667 2.159475 3 10.6 14.9
## ff 36.33333 7.333030 3 28.0 41.8
## oo 36.90000 4.300000 3 32.1 40.4
##
## Minimum Significant Difference 8.657906
## Treatments with the same letter are not significantly different.
##
## yield groups
## oo 36.90000 a
## ff 36.33333 a
## cc 24.40000 b
## fc 12.86667 c
bar.err(out$means,variation="SE",horiz=TRUE,xlim=c(0,45),
bar=FALSE,col=0)

bar.err(out$means,variation="range",ylim=c(0,45),bar=FALSE,col="light blue",
main="Range")

Significant interaction exists when the first two component terms of a principal component analysis sum to more than 50%. I provide the function plot.AMMI() to visualize the interaction. The parameter type specifies if a biplot or triplot is created.
v1 <- c(10.2,8.8,8.8,9.3,9.6,7.2,8.4,9.6,7.9,5.8)
v2 <- c(7.8,7.0,6.9,7,8.3,7.4,6.5,6.8,7.9,9.8)
v3 <- c(5.3,4.4,5.3,4.4,5.5,4.6,6.2,6.0,6.5,5.3)
v4 <- c(7.8,5.9,7.3,5.9,7.8,6.3,7.9,7.5,7.6,5.4)
v5 <-c(9,9.2,8.8,10.6,8.3,9.3,9.6,8.8,7.9,9.1)
study <- data.frame(v1, v2, v3, v4, v5)
rownames(study) <- LETTERS[1:10]
study # We assigned letters to each component of each vector
## v1 v2 v3 v4 v5
## A 10.2 7.8 5.3 7.8 9.0
## B 8.8 7.0 4.4 5.9 9.2
## C 8.8 6.9 5.3 7.3 8.8
## D 9.3 7.0 4.4 5.9 10.6
## E 9.6 8.3 5.5 7.8 8.3
## F 7.2 7.4 4.6 6.3 9.3
## G 8.4 6.5 6.2 7.9 9.6
## H 9.6 6.8 6.0 7.5 8.8
## I 7.9 7.9 6.5 7.6 7.9
## J 5.8 9.8 5.3 5.4 9.1
output <- stability.par(study, rep=4, MSerror=2)# This procedure calculates the stability variations as well as the statistics of selection for the yield and the stability. The averages of the genotype through the different environment repetitions are required for the calculations. The mean square error must be calculated from the joint variance analysis.
output
## $analysis
## Df Sum Sq Mean Sq F value Pr(>F)
## Total 49 507.22
## Genotypes 9 24.212 2.6902 0.68 0.725
## Environments 4 339.776 84.944 42.47 <0.001
## Interaction 36 143.232 3.9787 1.99 0.003
## Heterogeneity 9 38.5532 4.2837 1.1 0.392
## Residual 27 104.6788 3.877 1.94 0.0074
## Pooled Error 135 2
##
## $statistics
## Mean Sigma-square . s-square . Ecovalence
## A 8.02 2.125667 ns 2.677230 ns 8.3936
## B 7.06 1.288167 ns 0.065311 ns 5.7136
## C 7.42 0.305667 ns 0.571269 ns 2.5696
## D 7.44 5.980667 * 1.580475 ns 20.7296
## E 7.90 2.200667 ns 2.975449 ns 8.6336
## F 6.96 1.370667 ns 1.856986 ns 5.9776
## G 7.72 2.975667 ns 3.563961 ns 11.1136
## H 7.74 2.153167 ns 2.840653 ns 8.4816
## I 7.56 4.058167 ns 0.008218 ns 14.5776
## J 7.08 17.328167 ** 22.630361 ** 57.0416
##
## $stability
## Yield Rank Adj.rank Adjusted Stab.var Stab.rating YSi ...
## A 8.02 10 1 11 2.125667 0 11 +
## B 7.06 2 -1 1 1.288167 0 1
## C 7.42 4 -1 3 0.305667 0 3
## D 7.44 5 -1 4 5.980667 -4 0
## E 7.90 9 1 10 2.200667 0 10 +
## F 6.96 1 -1 0 1.370667 0 0
## G 7.72 7 1 8 2.975667 0 8 +
## H 7.74 8 1 9 2.153167 0 9 +
## I 7.56 6 1 7 4.058167 -2 5 +
## J 7.08 3 -1 2 17.328167 -8 -6
rdto <- c(study[,1], study[,2], study[,3], study[,4], study[,5])
environment <- gl(5,10)
genotype <- rep(rownames(study),5)
model<-AMMI(ENV=environment, GEN=genotype, REP=4, Y=rdto, MSE=2,
console=TRUE) # Additive Main Effects and Multiplicative Interaction Models (AMMI) are widely used to analyze main effects and genotype by environment (GEN, ENV) interactions in multilocation variety trials. Furthermore, this function generates data to biplot, triplot graphs and analysis.
##
## ANALYSIS AMMI: rdto
## Class level information
##
## ENV: 1 2 3 4 5
## GEN: A B C D E F G H I J
## REP: 4
##
## Number of means: 50
##
## Dependent Variable: rdto
##
## Analysis of variance
## Df Sum Sq Mean Sq F value Pr(>F)
## ENV 4 339.776 84.944000
## REP(ENV) 15
## GEN 9 24.212 2.690222 1.345111 0.219578952
## ENV:GEN 36 143.232 3.978667 1.989333 0.002581175
## Residuals 135 270.000 2.000000
##
## Coeff var Mean rdto
## 18.88136 7.49
##
## Analysis
## percent acum Df Sum.Sq Mean.Sq F.value Pr.F
## PC1 55.5 55.5 12 79.530655 6.627555 3.31 0.0003
## PC2 30.4 85.9 10 43.475820 4.347582 2.17 0.0232
## PC3 12.5 98.4 8 17.878412 2.234801 1.12 0.3537
## PC4 1.6 100.0 6 2.347113 0.391186 0.20 0.9763
plot(model,type=1,las=1)

plot(model,type=2,las=1)

# Another related visualization is the addition of a contour line which adds a contour line proportional to the longest distance of a genotype and has a values between 0 and 1
data(sinRepAmmi)
Environment <- sinRepAmmi$ENV
Genotype <- sinRepAmmi$GEN
Yield <- sinRepAmmi$YLD
REP <- 3
MSerror <- 85.5
model<-AMMI(Environment, Genotype, REP, Yield, MSerror)
plot(model)
AMMI.contour(model,distance=0.80,shape=8,col="red",lwd=2,lty=5)

##
## Limit, radio: 2.632673
## Genotype in: 46
## Genotype out: 4
## $`GENOTYPE IN`
## [1] "1" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "2" "21" "22"
## [15] "23" "24" "25" "26" "27" "28" "29" "31" "32" "33" "34" "35" "36" "37"
## [29] "38" "39" "4" "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" "5"
## [43] "50" "6" "7" "8"
##
## $`GENOTYPE OUT`
## [1] "20" "3" "30" "9"
##
## $Distance
## distance
## 1 1.9514760
## 10 1.5615217
## 11 1.1769189
## 12 1.5375472
## 13 1.9695047
## 14 2.4885468
## 15 0.8082461
## 16 1.2355245
## 17 0.9642868
## 18 1.7277735
## 19 1.2444535
## 2 2.5864575
## 20 3.0107101
## 21 1.3768422
## 22 1.8787002
## 23 1.6592957
## 24 1.1502888
## 25 0.9657643
## 26 1.3664762
## 27 0.9072038
## 28 2.2600489
## 29 1.9305338
## 3 3.2908418
## 30 3.2759033
## 31 0.5189824
## 32 2.0794344
## 33 0.3602233
## 34 2.3231291
## 35 1.0042183
## 36 0.2286455
## 37 0.5177541
## 38 1.0371016
## 39 0.7436472
## 4 0.7355002
## 40 0.7130276
## 41 1.4534899
## 42 0.5117842
## 43 1.3041322
## 44 0.6452761
## 45 1.2233650
## 46 1.8981620
## 47 1.5444075
## 48 0.7955642
## 49 2.1712610
## 5 2.5288390
## 50 0.8002928
## 6 1.4282289
## 7 1.0677117
## 8 2.3695614
## 9 3.1242502
References
De Mendiburu, F., & Simon, R. (2015). Agricolae-Ten years of an open source statistical tool for experiments in breeding, agriculture and biology. PeerJ PrePrints.