Notes file.choose() is a very handy command which saves the work associated with defining absolute and relative paths which can be quite cumbersome. list.files for non-interactive selection. choose.files for selecting multiple files interactively. More www.rdocumentation.org/packages/base/versions/3.5.2/topics/file.choose
x <- seq(1, 20, 0.5) # sequence from 1 to 20 by 0.5
x # print it
w <- 1 + x/2 # a weight vector
y <- x + w*rnorm(x) # generate y with standard deviations
# equal to w
dum <- data.frame( x, y, w ) # make a data frame of 3 columns
dum # typing it's name shows the object
rm( x, y, w ) # remove the original variables
fm <- lm( y ~ x, data = dum) # fit a simple linear regression (between x and y)
summary( fm ) # and look at the analysis
fm1 <- lm( y ~ x, data = dum, weight = 1/w^2) # a weighted regression
summary(fm1)
lrf <- loess( y ~ x, dum) # a smooth regression curve using a
# modern local regression function
attach( dum ) # make the columns in the data frame visible
# as variables (Note: before this command, typing x and
# y would yield errors)
plot( x, y) # a standard scatterplot to which we will
# fit regression curves
lines(spline( x, fitted(lrf)), col = 2) # add in the local regression using spline
# interpolation between calculated
# points
abline(0, 1, lty = 3, col = 3) # add in the true regression line (lty=3: read line type=dotted;
# col=3: read colour=green; type "?par" for details)
abline(fm, col = 4) # add in the unweighted regression line
abline(fm1, lty = 4, col = 5) # add in the weighted regression line
plot(fitted(fm), resid(fm),
xlab = "Fitted Values",
ylab = "Residuals") # a standard regression diagnostic plot
# to check for heteroscedasticity.
# The data were generated from a
# heteroscedastic process. Can you see
# it from this plot?
qqnorm(resid(fm)) # Normal scores to check for skewness,
# kurtosis and outliers. How would you
# expect heteroscedasticity to show up?
search() # Have a look at the search path
detach() # Remove the data frame from the search path
search() # Have another look. How did it change?
rm(fm, fm1, lrf, dum) # Clean up (this is not that important in R
# for reasons we will understand later)
library(rgl)
rgl.open()
rgl.bg(col='white')
x <- sort(rnorm(1000))
y <- rnorm(1000)
z1 <- atan2(x,y)
z <- rnorm(1000) + atan2(x, y)
plot3d(x, y, z1, col = rainbow(1000), size = 2)
bbox3d()
# http://wiki.math.yorku.ca/index.php/VR4:_Chapter_1_summary
x <- seq( 1, 20, 0.5) # sequence from 1 to 20 by 0.5
x # print it
w <- 1 + x/2 # a weight vector
y <- x + w*rnorm(x) # generate y with standard deviations
# equal to w
dum <- data.frame( x, y, w ) # make a data frame of 3 columns
dum # typing it's name shows the object
rm( x, y, w ) # remove the original variables
fm <- lm ( y ~ x, data = dum) # fit a simple linear regression (between x and y)
summary( fm ) # and look at the analysis
fm1 <- lm( y ~ x, data = dum, weight = 1/w^2 ) # a weighted regression
summary(fm1)
lrf <- loess( y ~ x, dum) # a smooth regression curve using a
# modern local regression function
attach( dum ) # make the columns in the data frame visible
# as variables (Note: before this command, typing x and
# y would yield errors)
plot( x, y) # a standard scatterplot to which we will
# fit regression curves
lines( spline( x, fitted (lrf)), col = 2) # add in the local regression using spline
# interpolation between calculated
# points
abline(0, 1, lty = 3, col = 3) # add in the true regression line (lty=3: read line type=dotted;
# col=3: read colour=green; type "?par" for details)
abline(fm, col = 4) # add in the unweighted regression line
abline(fm1, lty = 4, col = 5) # add in the weighted regression line
plot( fitted(fm), resid(fm),
xlab = "Fitted Values",
ylab = "Residuals") # a standard regression diagnostic plot
# to check for heteroscedasticity.
# The data were generated from a
# heteroscedastic process. Can you see
# it from this plot?
qqnorm( resid(fm) ) # Normal scores to check for skewness,
# kurtosis and outliers. How would you
# expect heteroscedasticity to show up?
search() # Have a look at the search path
detach() # Remove the data frame from the search path
search() # Have another look. How did it change?
rm(fm, fm1, lrf, dum) # Clean up (this is not that important in R
# for reasons we will understand later)
library(rgl)
rgl.open()
rgl.bg(col='white')
x <- sort(rnorm(1000))
y <- rnorm(1000)
z1 <- atan2(x,y)
z <- rnorm(1000) + atan2(x, y)
plot3d(x, y, z1, col = rainbow(1000), size = 2)
bbox3d()
# http://wiki.math.yorku.ca/index.php/VR4:_Chapter_1_summary
x <- seq( 1, 20, 0.5) # sequence from 1 to 20 by 0.5
x # print it
w <- 1 + x/2 # a weight vector
y <- x + w*rnorm(x) # generate y with standard deviations
# equal to w
dum <- data.frame( x, y, w ) # make a data frame of 3 columns
dum # typing it's name shows the object
rm( x, y, w ) # remove the original variables
fm <- lm ( y ~ x, data = dum) # fit a simple linear regression (between x and y)
summary( fm ) # and look at the analysis
fm1 <- lm( y ~ x, data = dum, weight = 1/w^2 ) # a weighted regression
summary(fm1)
lrf <- loess( y ~ x, dum) # a smooth regression curve using a
# modern local regression function
attach( dum ) # make the columns in the data frame visible
# as variables (Note: before this command, typing x and
# y would yield errors)
plot( x, y) # a standard scatterplot to which we will
# fit regression curves
lines( spline( x, fitted (lrf)), col = 2) # add in the local regression using spline
# interpolation between calculated
# points
abline(0, 1, lty = 3, col = 3) # add in the true regression line (lty=3: read line type=dotted;
# col=3: read colour=green; type "?par" for details)
abline(fm, col = 4) # add in the unweighted regression line
abline(fm1, lty = 4, col = 5) # add in the weighted regression line
plot( fitted(fm), resid(fm),
xlab = "Fitted Values",
ylab = "Residuals") # a standard regression diagnostic plot
# to check for heteroscedasticity.
# The data were generated from a
# heteroscedastic process. Can you see
# it from this plot?
qqnorm( resid(fm) ) # Normal scores to check for skewness,
# kurtosis and outliers. How would you
# expect heteroscedasticity to show up?
search() # Have a look at the search path
detach() # Remove the data frame from the search path
search() # Have another look. How did it change?
rm(fm, fm1, lrf, dum) # Clean up (this is not that important in R
# for reasons we will understand later)
library(rgl)
rgl.open()
rgl.bg(col='white')
x <- sort(rnorm(1000))
y <- rnorm(1000)
z1 <- atan2(x,y)
z <- rnorm(1000) + atan2(x, y)
plot3d(x, y, z1, col = rainbow(1000), size = 2)
bbox3d()
skew(x, na.rm = TRUE)
kurtosi(x, na.rm = TRUE)
#x A data.frame or matrix
#na.rm how to treat missing data
## The function is currently defined as
function (x, na.rm = TRUE)
{
if (length(dim(x)) == 0) {
if (na.rm) {
x <- x[!is.na(x)]
}
mx <- mean(x)
sdx <- sd(x,na.rm=na.rm)
kurt <- sum((x - mx)^4)/(length(x) * sd(x)^4) -3
} else {
kurt <- rep(NA,dim(x)[2])
mx <- colMeans(x,na.rm=na.rm)
sdx <- sd(x,na.rm=na.rm)
for (i in 1:dim(x)[2]) {
kurt[i] <- sum((x[,i] - mx[i])^4, na.rm = na.rm)/((length(x[,i]) - sum(is.na(x[,i]))) * sdx[i]^4) -3
}
}
return(kurt)
}
skew(x, na.rm = TRUE)
kurtosi(x, na.rm = TRUE)
#x A data.frame or matrix
#na.rm how to treat missing data
## The function is currently defined as
function (x, na.rm = TRUE)
{
if (length(dim(x)) == 0) {
if (na.rm) {
x <- x[!is.na(x)]
}
mx <- mean(x)
sdx <- sd(x,na.rm=na.rm)
kurt <- sum((x - mx)^4)/(length(x) * sd(x)^4) -3
} else {
kurt <- rep(NA,dim(x)[2])
mx <- colMeans(x,na.rm=na.rm)
sdx <- sd(x,na.rm=na.rm)
for (i in 1:dim(x)[2]) {
kurt[i] <- sum((x[,i] - mx[i])^4, na.rm = na.rm)/((length(x[,i]) - sum(is.na(x[,i]))) * sdx[i]^4) -3
}
}
return(kurt)
}
ttestBF function (one sample) BFmcmc functionttestBF function (two sample)Bayesian meta analysisBayes factor robustness analysis, one-sidedAnimating Robustness-Check of Bayes Factor PriorsSkewness & KurtosisanovaBF function
## Bem's t statistics from four selected experiments
t = c(-.15, 2.39, 2.42, 2.43)
N = c(100, 150, 97, 99)
###
## Do analysis again, without nullInterval restriction
bf = meta.ttestBF(t=t, n1=N, rscale=1)
## Obtain posterior samples
chains = posterior(bf, iterations = 10000)
plot(chains)
#https://richarddmorey.github.io/BayesFactor/
## Bem's t statistics from four selected experiments
t = c(-.15, 2.39, 2.42, 2.43)
N = c(100, 150, 97, 99)
###
## Do analysis again, without nullInterval restriction
bf = meta.ttestBF(t=t, n1=N, rscale=1)
## Obtain posterior samples
chains = posterior(bf, iterations = 10000)
plot(chains)
#https://richarddmorey.github.io/BayesFactor/
## Bem's t statistics from four selected experiments
t = c(-.15, 2.39, 2.42, 2.43)
N = c(100, 150, 97, 99)
###
## Do analysis again, without nullInterval restriction
bf = meta.ttestBF(t=t, n1=N, rscale=1)
## Obtain posterior samples
chains = posterior(bf, iterations = 10000)
plot(chains)
Comment
ttestBF function, which performs the “JZS” t test described by Rouder, Speckman, Sun, Morey, and Iverson (2009).
## This source code is licensed under the FreeBSD license
## (c) 2013 Felix Schönbrodt
#' @title Plots a comparison of a sequence of priors for t test Bayes factors
#'
#' @details
#'
#'
#' @param ts A vector of t values
#' @param ns A vector of corresponding sample sizes
#' @param rs The sequence of rs that should be tested. r should run up to 2 (higher values are implausible; E.-J. Wagenmakers, personal communication, Aug 22, 2013)
#' @param labels Names for the studies (displayed in the facet headings)
#' @param dots Values of r's which should be marked with a red dot
#' @param plot If TRUE, a ggplot is returned. If false, a data frame with the computed Bayes factors is returned
#' @param sides If set to "two" (default), a two-sided Bayes factor is computed. If set to "one", a one-sided Bayes factor is computed. In this case, it is assumed that positive t values correspond to results in the predicted direction and negative t values to results in the unpredicted direction. For details, see Wagenmakers, E. J., & Morey, R. D. (2013). Simple relation between one-sided and two-sided Bayesian point-null hypothesis tests.
#' @param nrow Number of rows of the faceted plot.
#' @param forH1 Defines the direction of the BF. If forH1 is TRUE, BF > 1 speak in favor of H1 (i.e., the quotient is defined as H1/H0). If forH1 is FALSE, it's the reverse direction.
#'
#' @references
#'
#' Rouder, J. N., Speckman, P. L., Sun, D., Morey, R. D., & Iverson, G. (2009). Bayesian t-tests for accepting and rejecting the null hypothesis. Psychonomic Bulletin and Review, 16, 225-237.
#' Wagenmakers, E.-J., & Morey, R. D. (2013). Simple relation between one-sided and two-sided Bayesian point-null hypothesis tests. Manuscript submitted for publication
#' Wagenmakers, E.-J., Wetzels, R., Borsboom, D., Kievit, R. & van der Maas, H. L. J. (2011). Yes, psychologists must change the way they analyze their data: Clarifications for Bem, Utts, & Johnson (2011)
## This source code is licensed under the FreeBSD license
## (c) 2013 Felix Schönbrodt
#' @title Plots a comparison of a sequence of priors for t test Bayes factors
#'
#' @details
#'
#'
#' @param ts A vector of t values
#' @param ns A vector of corresponding sample sizes
#' @param rs The sequence of rs that should be tested. r should run up to 2 (higher values are implausible; E.-J. Wagenmakers, personal communication, Aug 22, 2013)
#' @param labels Names for the studies (displayed in the facet headings)
#' @param dots Values of r's which should be marked with a red dot
#' @param plot If TRUE, a ggplot is returned. If false, a data frame with the computed Bayes factors is returned
#' @param sides If set to "two" (default), a two-sided Bayes factor is computed. If set to "one", a one-sided Bayes factor is computed. In this case, it is assumed that positive t values correspond to results in the predicted direction and negative t values to results in the unpredicted direction. For details, see Wagenmakers, E. J., & Morey, R. D. (2013). Simple relation between one-sided and two-sided Bayesian point-null hypothesis tests.
#' @param nrow Number of rows of the faceted plot.
#' @param forH1 Defines the direction of the BF. If forH1 is TRUE, BF > 1 speak in favor of H1 (i.e., the quotient is defined as H1/H0). If forH1 is FALSE, it's the reverse direction.
#'
#' @references
#'
#' Rouder, J. N., Speckman, P. L., Sun, D., Morey, R. D., & Iverson, G. (2009). Bayesian t-tests for accepting and rejecting the null hypothesis. Psychonomic Bulletin and Review, 16, 225-237.
#' Wagenmakers, E.-J., & Morey, R. D. (2013). Simple relation between one-sided and two-sided Bayesian point-null hypothesis tests. Manuscript submitted for publication
#' Wagenmakers, E.-J., Wetzels, R., Borsboom, D., Kievit, R. & van der Maas, H. L. J. (2011). Yes, psychologists must change the way they analyze their data: Clarifications for Bem, Utts, & Johnson (2011)
BFrobustplot <- function(
ts, ns, rs=seq(0, 2, length.out=200), dots=1, plot=TRUE,
labels=c(), sides="two", nrow=2, xticks=3, forH1=TRUE)
{
library(BayesFactor)
# compute one-sided p-values from ts and ns
ps <- pt(ts, df=ns-1, lower.tail = FALSE) # one-sided test
# add the dots location to the sequences of r's
rs <- c(rs, dots)
res <- data.frame()
for (r in rs) {
# first: calculate two-sided BF
B_e0 <- c()
for (i in 1:length(ts))
B_e0 <- c(B_e0, exp(ttest.tstat(t = ts[i], n1 = ns[i], rscale=r)You can't use 'macro parameter character #' in math modeBF <- res[, paste0("BF_", sides)]
# Flip BF if requested
if (forH1 == FALSE) {
resBF
}
if (plot==TRUE) {
library(ggplot2)
p1 <- ggplot(res, aes(x=r, y=log(BF))) + geom_line() + facet_wrap(~heading, nrow=nrow) + theme_bw() + ylab("log(BF)")
p1 <- p1 + geom_hline(yintercept=c(c(-log(c(30, 10, 3)), log(c(3, 10, 30)))), linetype="dotted", color="darkgrey")
p1 <- p1 + geom_hline(yintercept=log(1), linetype="dashed", color="darkgreen")
# add the dots
p1 <- p1 + geom_point(data=res[res$r %in% dots,], aes(x=r, y=log(BF)), color="red", size=2)
# add annotation
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=-2.85, label=paste0("Strong~H[", ifelse(forH1==TRUE,0,1), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=-1.7 , label=paste0("Moderate~H[", ifelse(forH1==TRUE,0,1), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=-.55 , label=paste0("Anectodal~H[", ifelse(forH1==TRUE,0,1), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=2.86 , label=paste0("Strong~H[", ifelse(forH1==TRUE,1,0), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=1.7 , label=paste0("Moderate~H[", ifelse(forH1==TRUE,1,0), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=.55 , label=paste0("Anectodal~H[", ifelse(forH1==TRUE,1,0), "]"), hjust=1, vjust=.5, vjust=.5, size=3, color="black", parse=TRUE)
# set scale ticks
p1 <- p1 + scale_y_continuous(breaks=c(c(-log(c(30, 10, 3)), 0, log(c(3, 10, 30)))), labels=c("-log(30)", "-log(10)", "-log(3)", "log(1)", "log(3)", "log(10)", "log(30)"))
p1 <- p1 + scale_x_continuous(breaks=seq(min(rs), max(rs), length.out=xticks))
return(p1)
} else {
return(res)
}
}
## This source code is licensed under the FreeBSD license
## (c) 2013 Felix Schönbrodt
#' @title Plots a comparison of a sequence of priors for t test Bayes factors
#'
#' @details
#'
#'
#' @param ts A vector of t values
#' @param ns A vector of corresponding sample sizes
#' @param rs The sequence of rs that should be tested. r should run up to 2 (higher values are implausible; E.-J. Wagenmakers, personal communication, Aug 22, 2013)
#' @param labels Names for the studies (displayed in the facet headings)
#' @param dots Values of r's which should be marked with a red dot
#' @param plot If TRUE, a ggplot is returned. If false, a data frame with the computed Bayes factors is returned
#' @param sides If set to "two" (default), a two-sided Bayes factor is computed. If set to "one", a one-sided Bayes factor is computed. In this case, it is assumed that positive t values correspond to results in the predicted direction and negative t values to results in the unpredicted direction. For details, see Wagenmakers, E. J., & Morey, R. D. (2013). Simple relation between one-sided and two-sided Bayesian point-null hypothesis tests.
#' @param nrow Number of rows of the faceted plot.
#' @param forH1 Defines the direction of the BF. If forH1 is TRUE, BF > 1 speak in favor of H1 (i.e., the quotient is defined as H1/H0). If forH1 is FALSE, it's the reverse direction.
#'
#' @references
#'
#' Rouder, J. N., Speckman, P. L., Sun, D., Morey, R. D., & Iverson, G. (2009). Bayesian t-tests for accepting and rejecting the null hypothesis. Psychonomic Bulletin and Review, 16, 225-237.
#' Wagenmakers, E.-J., & Morey, R. D. (2013). Simple relation between one-sided and two-sided Bayesian point-null hypothesis tests. Manuscript submitted for publication
#' Wagenmakers, E.-J., Wetzels, R., Borsboom, D., Kievit, R. & van der Maas, H. L. J. (2011). Yes, psychologists must change the way they analyze their data: Clarifications for Bem, Utts, & Johnson (2011)
BFrobustplot <- function(
ts, ns, rs=seq(0, 2, length.out=200), dots=1, plot=TRUE,
labels=c(), sides="two", nrow=2, xticks=3, forH1=TRUE)
{
library(BayesFactor)
# compute one-sided p-values from ts and ns
ps <- pt(ts, df=ns-1, lower.tail = FALSE) # one-sided test
# add the dots location to the sequences of r's
rs <- c(rs, dots)
res <- data.frame()
for (r in rs) {
# first: calculate two-sided BF
B_e0 <- c()
for (i in 1:length(ts))
B_e0 <- c(B_e0, exp(ttest.tstat(t = ts[i], n1 = ns[i], rscale=r)$bf))
# second: calculate one-sided BF
B_r0 <- c() for (i in 1:length(ts)) { if (ts[i] > 0) {
# correct direction
B_r0 <- c(B_r0, (2 - 2*ps[i])*B_e0[i])
} else {
# wrong direction
B_r0 <- c(B_r0, (1 - ps[i])*2*B_e0[i])
}
}
res0 <- data.frame(t=ts, n=ns, BF_two=B_e0, BF_one=B_r0, r=r) if (length(labels) > 0) {
res0$labels <- labels
res0$heading <- factor(1:length(labels), labels=paste0(labels, "\n(t = ", ts, ", df = ", ns-1, ")"), ordered=TRUE)
} else {
res0$heading <- factor(1:length(ts), labels=paste0("t = ", ts, ", df = ", ns-1), ordered=TRUE)
}
res <- rbind(res, res0)
}
# define the measure to be plotted: one- or two-sided?
res$BF <- res[, paste0("BF_", sides)]
# Flip BF if requested
if (forH1 == FALSE) {
res$BF <- 1/res$BF
}
if (plot==TRUE) {
library(ggplot2)
p1 <- ggplot(res, aes(x=r, y=log(BF))) + geom_line() + facet_wrap(~heading, nrow=nrow) + theme_bw() + ylab("log(BF)")
p1 <- p1 + geom_hline(yintercept=c(c(-log(c(30, 10, 3)), log(c(3, 10, 30)))), linetype="dotted", color="darkgrey")
p1 <- p1 + geom_hline(yintercept=log(1), linetype="dashed", color="darkgreen")
# add the dots
p1 <- p1 + geom_point(data=res[res$r %in% dots,], aes(x=r, y=log(BF)), color="red", size=2)
# add annotation
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=-2.85, label=paste0("Strong~H[", ifelse(forH1==TRUE,0,1), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=-1.7 , label=paste0("Moderate~H[", ifelse(forH1==TRUE,0,1), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=-.55 , label=paste0("Anectodal~H[", ifelse(forH1==TRUE,0,1), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=2.86 , label=paste0("Strong~H[", ifelse(forH1==TRUE,1,0), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=1.7 , label=paste0("Moderate~H[", ifelse(forH1==TRUE,1,0), "]"), hjust=1, vjust=.5, size=3, color="black", parse=TRUE)
p1 <- p1 + annotate("text", x=max(rs)*1.8, y=.55 , label=paste0("Anectodal~H[", ifelse(forH1==TRUE,1,0), "]"), hjust=1, vjust=.5, vjust=.5, size=3, color="black", parse=TRUE)
# set scale ticks
p1 <- p1 + scale_y_continuous(breaks=c(c(-log(c(30, 10, 3)), 0, log(c(3, 10, 30)))), labels=c("-log(30)", "-log(10)", "-log(3)", "log(1)", "log(3)", "log(10)", "log(30)"))
p1 <- p1 + scale_x_continuous(breaks=seq(min(rs), max(rs), length.out=xticks))
return(p1)
} else {
return(res)
}
}
References
Rouder, J. N., Speckman, P. L., Sun, D., Morey, R. D., & Iverson, G. (2009). Bayesian t-tests for accepting and rejecting the null hypothesis. Psychonomic Bulletin and Review, 16, 225-237
#https://jimgrange.wordpress.com/2015/11/27/animating-robustness-check-of-bayes-factor-priors/
### plots of prior robustness check
# gif created from PNG plots using http://gifmaker.me/
# clear R's memory
rm(list = ls())
# load the Bayes factor package
library(BayesFactor)
# set working directory (will be used to save files here, so make sure
# this is where you want to save your plots!)
setwd <- "D:/Work//Blog_YouTube code//Blog/Prior Robust Visualisation/plots"
#------------------------------------------------------------------------------
### declare some variables for the analysis
# what is the t-value for the data?
tVal <- 3.098
# how many points in the prior should be explored?
nPoints <- 1000
# what Cauchy rates should be explored?
cauchyRates <- seq(from = 0.01, to = 1.5, length.out = nPoints)
# what effect sizes should be plotted?
effSize <- seq(from = -2, to = 2, length.out = nPoints)
# get the Bayes factor for each prior value
bayesFactors <- sapply(cauchyRates, function(x)
exp(ttest.tstat(t = tVal, n1 = 76, rscale = x)[['bf']]))
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
### do the plotting
# how many plots do we want to produce?
nPlots <- 50
plotWidth <- round(seq(from = 1, to = nPoints, length.out = nPlots), 0)
# loop over each plot
for(i in plotWidth){
# set up the file
currFile <- paste(getwd(), "/plot_", i, ".png", sep = "")
# initiate the png file
png(file = currFile, width = 1200, height = 1000, res = 200)
# change the plotting window so plots appear side-by-side
par(mfrow = c(1, 2))
#----
# do the prior density plot
d <- dcauchy(effSize, scale = cauchyRates[i])
plot(effSize, d, type = "l", ylim = c(0, 5), xlim = c(-2, 2),
ylab = "Density", xlab = "Effect Size (d)", lwd = 2, col = "gray48",
main = paste("Rate (r) = ", round(cauchyRates[i], 3), sep = ""))
#-----
# do the Bayes factor plot
plot(cauchyRates, bayesFactors, type = "l", lwd = 2, col = "gray48",
ylim = c(0, max(bayesFactors)), xaxt = "n",
xlab = "Cauchy Prior Width (r)", ylab = "Bayes Factor (10)")
abline(h = 0, lwd = 1)
abline(h = 6, col = "black", lty = 2, lwd = 2)
axis(1, at = seq(0, 1.5, 0.25))
# add the BF at the default Cauchy point
points(0.707, 9.97, col = "black", cex = 1.5, pch = 21, bg = "red")
# add the BF for the Cauchy prior currently being plotted
points(cauchyRates[i], bayesFactors[i], col = "black", pch = 21, cex = 1.3,
bg = "cyan")
# add legend
legend(x = 0.25, y = 3, legend = c("r = 0.707", paste("r = ",
round(cauchyRates[i], 3),
sep = "")),
pch = c(21, 21), lty = c(NA, NA), lwd = c(NA, NA), pt.cex = c(1, 1),
col = c("black", "black"), pt.bg = c("red", "cyan"), bty = "n")
# save the current plot
dev.off()
}
#------------------------------------------------------------------------------
#https://jimgrange.wordpress.com/2015/11/27/animating-robustness-check-of-bayes-factor-priors/
### plots of prior robustness check
# gif created from PNG plots using http://gifmaker.me/
# clear R's memory
rm(list = ls())
# load the Bayes factor package
library(BayesFactor)
# set working directory (will be used to save files here, so make sure
# this is where you want to save your plots!)
setwd <- "D:/Work//Blog_YouTube code//Blog/Prior Robust Visualisation/plots"
#------------------------------------------------------------------------------
### declare some variables for the analysis
# what is the t-value for the data?
tVal <- 3.098
# how many points in the prior should be explored?
nPoints <- 1000
# what Cauchy rates should be explored?
cauchyRates <- seq(from = 0.01, to = 1.5, length.out = nPoints)
# what effect sizes should be plotted?
effSize <- seq(from = -2, to = 2, length.out = nPoints)
# get the Bayes factor for each prior value
bayesFactors <- sapply(cauchyRates, function(x)
exp(ttest.tstat(t = tVal, n1 = 76, rscale = x)[['bf']]))
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
### do the plotting
# how many plots do we want to produce?
nPlots <- 50
plotWidth <- round(seq(from = 1, to = nPoints, length.out = nPlots), 0)
# loop over each plot
for(i in plotWidth){
# set up the file
currFile <- paste(getwd(), "/plot_", i, ".png", sep = "")
# initiate the png file
png(file = currFile, width = 1200, height = 1000, res = 200)
# change the plotting window so plots appear side-by-side
par(mfrow = c(1, 2))
#----
# do the prior density plot
d <- dcauchy(effSize, scale = cauchyRates[i])
plot(effSize, d, type = "l", ylim = c(0, 5), xlim = c(-2, 2),
ylab = "Density", xlab = "Effect Size (d)", lwd = 2, col = "gray48",
main = paste("Rate (r) = ", round(cauchyRates[i], 3), sep = ""))
#-----
# do the Bayes factor plot
plot(cauchyRates, bayesFactors, type = "l", lwd = 2, col = "gray48",
ylim = c(0, max(bayesFactors)), xaxt = "n",
xlab = "Cauchy Prior Width (r)", ylab = "Bayes Factor (10)")
abline(h = 0, lwd = 1)
abline(h = 6, col = "black", lty = 2, lwd = 2)
axis(1, at = seq(0, 1.5, 0.25))
# add the BF at the default Cauchy point
points(0.707, 9.97, col = "black", cex = 1.5, pch = 21, bg = "red")
# add the BF for the Cauchy prior currently being plotted
points(cauchyRates[i], bayesFactors[i], col = "black", pch = 21, cex = 1.3,
bg = "cyan")
# add legend
legend(x = 0.25, y = 3, legend = c("r = 0.707", paste("r = ",
round(cauchyRates[i], 3),
sep = "")),
pch = c(21, 21), lty = c(NA, NA), lwd = c(NA, NA), pt.cex = c(1, 1),
col = c("black", "black"), pt.bg = c("red", "cyan"), bty = "n")
# save the current plot
dev.off()
}
#------------------------------------------------------------------------------
skew(x, na.rm = TRUE)
kurtosi(x, na.rm = TRUE)
#x A data.frame or matrix
#na.rm how to treat missing data
## The function is currently defined as
function (x, na.rm = TRUE)
{
if (length(dim(x)) == 0) {
if (na.rm) {
x <- x[!is.na(x)]
}
mx <- mean(x)
sdx <- sd(x,na.rm=na.rm)
kurt <- sum((x - mx)^4)/(length(x) * sd(x)^4) -3
} else {
kurt <- rep(NA,dim(x)[2])
mx <- colMeans(x,na.rm=na.rm)
sdx <- sd(x,na.rm=na.rm)
for (i in 1:dim(x)[2]) {
kurt[i] <- sum((x[,i] - mx[i])^4, na.rm = na.rm)/((length(x[,i]) - sum(is.na(x[,i]))) * sdx[i]^4) -3
}
}
return(kurt)
}
skew(x, na.rm = TRUE)
kurtosi(x, na.rm = TRUE)
#x A data.frame or matrix
#na.rm how to treat missing data
## The function is currently defined as
function (x, na.rm = TRUE)
{
if (length(dim(x)) == 0) {
if (na.rm) {
x <- x[!is.na(x)]
}
mx <- mean(x)
sdx <- sd(x,na.rm=na.rm)
kurt <- sum((x - mx)^4)/(length(x) * sd(x)^4) -3
} else {
kurt <- rep(NA,dim(x)[2])
mx <- colMeans(x,na.rm=na.rm)
sdx <- sd(x,na.rm=na.rm)
for (i in 1:dim(x)[2]) {
kurt[i] <- sum((x[,i] - mx[i])^4, na.rm = na.rm)/((length(x[,i]) - sum(is.na(x[,i]))) * sdx[i]^4) -3
}
}
return(kurt)
}
bfMainEffects = lmBF(len ~ supp + dose, data = ToothGrowth)
bfInteraction = lmBF(len ~ supp + dose + supp:dose, data = ToothGrowth)
## Compare the two models
bf = bfInteraction / bfMainEffects
bf
##
newbf = recompute(bf, iterations = 500000)
newbf
# posterior function
## Sample from the posterior of the full model
chains = posterior(bfInteraction, iterations = 10000)
## 1:13 are the only "interesting" parameters
summary(chains[,1:13])
#lot posterior of selected effects
plot(chains[,4:6])
bfMainEffects = lmBF(len ~ supp + dose, data = ToothGrowth)
bfInteraction = lmBF(len ~ supp + dose + supp:dose, data = ToothGrowth)
## Compare the two models
bf = bfInteraction / bfMainEffects
bf
##
newbf = recompute(bf, iterations = 500000)
newbf
# posterior function
## Sample from the posterior of the full model
chains = posterior(bfInteraction, iterations = 10000)
## 1:13 are the only "interesting" parameters
summary(chains[,1:13])
#lot posterior of selected effects
plot(chains[,4:6])
Comment
Reduce number of comparisons by specifyig the xact model of interest a priori.
# MODIFIED FROM BEST.R FOR ONE GROUP INSTEAD OF TWO.
# Version of Dec 02, 2015.
# John K. Kruschke
# johnkruschke@gmail.com
# http://www.indiana.edu/~kruschke/BEST/
#
# This program is believed to be free of errors, but it comes with no guarantee!
# The user bears all responsibility for interpreting the results.
# Please check the webpage above for updates or corrections.
#
### ***************************************************************
### ******** SEE FILE BEST1Gexample.R FOR INSTRUCTIONS ************
### ***************************************************************
source("openGraphSaveGraph.R") # graphics functions for Windows, Mac OS, Linux
BEST1Gmcmc = function( y , numSavedSteps=100000, thinSteps=1, showMCMC=FALSE) {
# This function generates an MCMC sample from the posterior distribution.
# Description of arguments:
# showMCMC is a flag for displaying diagnostic graphs of the chains.
# If F (the default), no chain graphs are displayed. If T, they are.
require(rjags)
#------------------------------------------------------------------------------
# THE MODEL.
modelString = "
model {
for ( i in 1:Ntotal ) {
y[i] ~ dt( mu , tau , nu )
}
mu ~ dnorm( muM , muP )
tau <- 1/pow( sigma , 2 )
sigma ~ dunif( sigmaLow , sigmaHigh )
nu ~ dexp(1/30)
}
" # close quote for modelString
# Write out modelString to a text file
writeLines( modelString , con="BESTmodel.txt" )
#------------------------------------------------------------------------------
# THE DATA.
# Load the data:
Ntotal = length(y)
# Specify the data in a list, for later shipment to JAGS:
dataList = list(
y = y ,
Ntotal = Ntotal ,
muM = mean(y) ,
muP = 0.000001 * 1/sd(y)^2 ,
sigmaLow = sd(y) / 1000 ,
sigmaHigh = sd(y) * 1000
)
#------------------------------------------------------------------------------
# INTIALIZE THE CHAINS.
# Initial values of MCMC chains based on data:
mu = mean(y)
sigma = sd(y)
# Regarding initial values in next line: (1) sigma will tend to be too big if
# the data have outliers, and (2) nu starts at 5 as a moderate value. These
# initial values keep the burn-in period moderate.
initsList = list( mu = mu , sigma = sigma , nu = 5 )
#------------------------------------------------------------------------------
# RUN THE CHAINS
parameters = c( "mu" , "sigma" , "nu" ) # The parameters to be monitored
adaptSteps = 500 # Number of steps to "tune" the samplers
burnInSteps = 1000
nChains = 3
nIter = ceiling( ( numSavedSteps * thinSteps ) / nChains )
# Create, initialize, and adapt the model:
jagsModel = jags.model( "BESTmodel.txt" , data=dataList , inits=initsList ,
n.chains=nChains , n.adapt=adaptSteps )
# Burn-in:
cat( "Burning in the MCMC chain...\n" )
update( jagsModel , n.iter=burnInSteps )
# The saved MCMC chain:
cat( "Sampling final MCMC chain...\n" )
codaSamples = coda.samples( jagsModel , variable.names=parameters ,
n.iter=nIter , thin=thinSteps )
# resulting codaSamples object has these indices:
# codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
#------------------------------------------------------------------------------
# EXAMINE THE RESULTS
if ( showMCMC ) {
openGraph(width=7,height=7)
autocorr.plot( codaSamples[[1]] , ask=FALSE )
show( gelman.diag( codaSamples ) )
effectiveChainLength = effectiveSize( codaSamples )
show( effectiveChainLength )
}
# Convert coda-object codaSamples to matrix object for easier handling.
# But note that this concatenates the different chains into one long chain.
# Result is mcmcChain[ stepIdx , paramIdx ]
mcmcChain = as.matrix( codaSamples )
return( mcmcChain )
} # end function BESTmcmc
#==============================================================================
BEST1Gplot = function( y , mcmcChain , compValm=0.0 , ROPEm=NULL ,
compValsd=NULL , ROPEsd=NULL ,
ROPEeff=NULL , showCurve=FALSE , pairsPlot=FALSE ) {
# This function plots the posterior distribution (and data).
# Description of arguments:
# y is the data vector.
# mcmcChain is a list of the type returned by function BEST1G.
# compValm is hypothesized mean for comparing with estimated mean.
# ROPEm is a two element vector, such as c(-1,1), specifying the limit
# of the ROPE on the mean.
# ROPEsd is a two element vector, such as c(14,16), specifying the limit
# of the ROPE on the difference of standard deviations.
# ROPEeff is a two element vector, such as c(-1,1), specifying the limit
# of the ROPE on the effect size.
# showCurve is TRUE or FALSE and indicates whether the posterior should
# be displayed as a histogram (by default) or by an approximate curve.
# pairsPlot is TRUE or FALSE and indicates whether scatterplots of pairs
# of parameters should be displayed.
mu = mcmcChain[,"mu"]
sigma = mcmcChain[,"sigma"]
nu = mcmcChain[,"nu"]
if ( pairsPlot ) {
# Plot the parameters pairwise, to see correlations:
openGraph(width=7,height=7)
nPtToPlot = 1000
plotIdx = floor(seq(1,length(mu),by=length(mu)/nPtToPlot))
panel.cor = function(x, y, digits=2, prefix="", cex.cor, ...) {
usr = par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y))
txt = format(c(r, 0.123456789), digits=digits)[1]
txt = paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex=1.25 ) # was cex=cex.cor*r
}
pairs( cbind( mu , sigma , log10(nu) )[plotIdx,] ,
labels=c( expression(mu) ,
expression(sigma) ,
expression(log10(nu)) ) ,
lower.panel=panel.cor , col="skyblue" )
}
# Define matrix for storing summary info:
summaryInfo = NULL
source("plotPost.R")
# Set up window and layout:
openGraph(width=6.0,height=5.0)
layout( matrix( c(3,3,4,4,5,5, 1,1,1,1,2,2) , nrow=6, ncol=2 , byrow=FALSE ) )
par( mar=c(3.5,3.5,2.5,0.5) , mgp=c(2.25,0.7,0) )
# Select thinned steps in chain for plotting of posterior predictive curves:
chainLength = NROW( mcmcChain )
nCurvesToPlot = 30
stepIdxVec = seq( 1 , chainLength , floor(chainLength/nCurvesToPlot) )
xRange = range( y )
xLim = c( xRange[1]-0.1*(xRange[2]-xRange[1]) ,
xRange[2]+0.1*(xRange[2]-xRange[1]) )
xVec = seq( xLim[1] , xLim[2] , length=200 )
maxY = max( dt( 0 , df=max(nu[stepIdxVec]) ) / min(sigma[stepIdxVec]) )
# Plot data y and smattering of posterior predictive curves:
stepIdx = 1
plot( xVec , dt( (xVec-mu[stepIdxVec[stepIdx]])/sigma[stepIdxVec[stepIdx]] ,
df=nu[stepIdxVec[stepIdx]] )/sigma[stepIdxVec[stepIdx]] ,
ylim=c(0,maxY) , cex.lab=1.75 ,
type="l" , col="skyblue" , lwd=1 , xlab="y" , ylab="p(y)" ,
main="Data w. Post. Pred." )
for ( stepIdx in 2:length(stepIdxVec) ) {
lines(xVec, dt( (xVec-mu[stepIdxVec[stepIdx]])/sigma[stepIdxVec[stepIdx]] ,
df=nu[stepIdxVec[stepIdx]] )/sigma[stepIdxVec[stepIdx]] ,
type="l" , col="skyblue" , lwd=1 )
}
histBinWd = median(sigma)/2
histCenter = mean(mu)
histBreaks = sort( c( seq( histCenter-histBinWd/2 , min(xVec)-histBinWd/2 ,
-histBinWd ),
seq( histCenter+histBinWd/2 , max(xVec)+histBinWd/2 ,
histBinWd ) , xLim ) )
histInfo = hist( y , plot=FALSE , breaks=histBreaks )
yPlotVec = histInfomids
xPlotVec[ yPlotVec==0.0 ] = NA
points( xPlotVec , yPlotVec , type="h" , lwd=3 , col="red" )
text( max(xVec) , maxY , bquote(N==.(length(y))) , adj=c(1.1,1.1) )
# Plot posterior distribution of parameter nu:
paramInfo = plotPost( log10(nu) , col="skyblue" , # breaks=30 ,
showCurve=showCurve ,
xlab=bquote("log10("*nu*")") , cex.lab = 1.75 , showMode=TRUE ,
main="Normality" ) # (<0.7 suggests kurtosis)
summaryInfo = rbind( summaryInfo , paramInfo )
rownames(summaryInfo)[NROW(summaryInfo)] = "log10(nu)"
# Plot posterior distribution of parameters mu1, mu2, and their difference:
xlim = range( c( mu , compValm ) )
paramInfo = plotPost( mu , xlim=xlim , cex.lab = 1.75 , ROPE=ROPEm ,
showCurve=showCurve , compVal = compValm ,
xlab=bquote(mu) , main=paste("Mean") ,
col="skyblue" )
summaryInfo = rbind( summaryInfo , paramInfo )
rownames(summaryInfo)[NROW(summaryInfo)] = "mu"
# Plot posterior distribution of sigma:
xlim=range( sigma )
paramInfo = plotPost( sigma , xlim=xlim , cex.lab = 1.75 , ROPE=ROPEsd ,
showCurve=showCurve , compVal=compValsd ,
xlab=bquote(sigma) , main=paste("Std. Dev.") ,
col="skyblue" , showMode=TRUE )
summaryInfo = rbind( summaryInfo , paramInfo )
rownames(summaryInfo)[NROW(summaryInfo)] = "sigma"
# Plot of estimated effect size:
effectSize = ( mu - compValm ) / sigma
paramInfo = plotPost( effectSize , compVal=0 , ROPE=ROPEeff ,
showCurve=showCurve ,
xlab=bquote( (mu-.(compValm)) / sigma ),
showMode=TRUE , cex.lab=1.75 , main="Effect Size" , col="skyblue" )
summaryInfo = rbind( summaryInfo , paramInfo )
rownames(summaryInfo)[NROW(summaryInfo)] = "effSz"
return( summaryInfo )
} # end of function BEST1Gplot
#==============================================================================
# MODIFIED FROM BEST.R FOR ONE GROUP INSTEAD OF TWO.
# Version of Dec 02, 2015.
# John K. Kruschke
# johnkruschke@gmail.com
# http://www.indiana.edu/~kruschke/BEST/
#
# This program is believed to be free of errors, but it comes with no guarantee!
# The user bears all responsibility for interpreting the results.
# Please check the webpage above for updates or corrections.
#
### ***************************************************************
### ******** SEE FILE BEST1Gexample.R FOR INSTRUCTIONS ************
### ***************************************************************
source("openGraphSaveGraph.R") # graphics functions for Windows, Mac OS, Linux
BEST1Gmcmc = function( y , numSavedSteps=100000, thinSteps=1, showMCMC=FALSE) {
# This function generates an MCMC sample from the posterior distribution.
# Description of arguments:
# showMCMC is a flag for displaying diagnostic graphs of the chains.
# If F (the default), no chain graphs are displayed. If T, they are.
require(rjags)
#------------------------------------------------------------------------------
# THE MODEL.
modelString = "
model {
for ( i in 1:Ntotal ) {
y[i] ~ dt( mu , tau , nu )
}
mu ~ dnorm( muM , muP )
tau <- 1/pow( sigma , 2 )
sigma ~ dunif( sigmaLow , sigmaHigh )
nu ~ dexp(1/30)
}
" # close quote for modelString
# Write out modelString to a text file
writeLines( modelString , con="BESTmodel.txt" )
#------------------------------------------------------------------------------
# THE DATA.
# Load the data:
Ntotal = length(y)
# Specify the data in a list, for later shipment to JAGS:
dataList = list(
y = y ,
Ntotal = Ntotal ,
muM = mean(y) ,
muP = 0.000001 * 1/sd(y)^2 ,
sigmaLow = sd(y) / 1000 ,
sigmaHigh = sd(y) * 1000
)
#------------------------------------------------------------------------------
# INTIALIZE THE CHAINS.
# Initial values of MCMC chains based on data:
mu = mean(y)
sigma = sd(y)
# Regarding initial values in next line: (1) sigma will tend to be too big if
# the data have outliers, and (2) nu starts at 5 as a moderate value. These
# initial values keep the burn-in period moderate.
initsList = list( mu = mu , sigma = sigma , nu = 5 )
#------------------------------------------------------------------------------
# RUN THE CHAINS
parameters = c( "mu" , "sigma" , "nu" ) # The parameters to be monitored
adaptSteps = 500 # Number of steps to "tune" the samplers
burnInSteps = 1000
nChains = 3
nIter = ceiling( ( numSavedSteps * thinSteps ) / nChains )
# Create, initialize, and adapt the model:
jagsModel = jags.model( "BESTmodel.txt" , data=dataList , inits=initsList ,
n.chains=nChains , n.adapt=adaptSteps )
# Burn-in:
cat( "Burning in the MCMC chain...\n" )
update( jagsModel , n.iter=burnInSteps )
# The saved MCMC chain:
cat( "Sampling final MCMC chain...\n" )
codaSamples = coda.samples( jagsModel , variable.names=parameters ,
n.iter=nIter , thin=thinSteps )
# resulting codaSamples object has these indices:
# codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
#------------------------------------------------------------------------------
# EXAMINE THE RESULTS
if ( showMCMC ) {
openGraph(width=7,height=7)
autocorr.plot( codaSamples[[1]] , ask=FALSE )
show( gelman.diag( codaSamples ) )
effectiveChainLength = effectiveSize( codaSamples )
show( effectiveChainLength )
}
# Convert coda-object codaSamples to matrix object for easier handling.
# But note that this concatenates the different chains into one long chain.
# Result is mcmcChain[ stepIdx , paramIdx ]
mcmcChain = as.matrix( codaSamples )
return( mcmcChain )
} # end function BESTmcmc
#==============================================================================
BEST1Gplot = function( y , mcmcChain , compValm=0.0 , ROPEm=NULL ,
compValsd=NULL , ROPEsd=NULL ,
ROPEeff=NULL , showCurve=FALSE , pairsPlot=FALSE ) {
# This function plots the posterior distribution (and data).
# Description of arguments:
# y is the data vector.
# mcmcChain is a list of the type returned by function BEST1G.
# compValm is hypothesized mean for comparing with estimated mean.
# ROPEm is a two element vector, such as c(-1,1), specifying the limit
# of the ROPE on the mean.
# ROPEsd is a two element vector, such as c(14,16), specifying the limit
# of the ROPE on the difference of standard deviations.
# ROPEeff is a two element vector, such as c(-1,1), specifying the limit
# of the ROPE on the effect size.
# showCurve is TRUE or FALSE and indicates whether the posterior should
# be displayed as a histogram (by default) or by an approximate curve.
# pairsPlot is TRUE or FALSE and indicates whether scatterplots of pairs
# of parameters should be displayed.
mu = mcmcChain[,"mu"]
sigma = mcmcChain[,"sigma"]
nu = mcmcChain[,"nu"]
if ( pairsPlot ) {
# Plot the parameters pairwise, to see correlations:
openGraph(width=7,height=7)
nPtToPlot = 1000
plotIdx = floor(seq(1,length(mu),by=length(mu)/nPtToPlot))
panel.cor = function(x, y, digits=2, prefix="", cex.cor, ...) {
usr = par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y))
txt = format(c(r, 0.123456789), digits=digits)[1]
txt = paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex=1.25 ) # was cex=cex.cor*r
}
pairs( cbind( mu , sigma , log10(nu) )[plotIdx,] ,
labels=c( expression(mu) ,
expression(sigma) ,
expression(log10(nu)) ) ,
lower.panel=panel.cor , col="skyblue" )
}
# Define matrix for storing summary info:
summaryInfo = NULL
source("plotPost.R")
# Set up window and layout:
openGraph(width=6.0,height=5.0)
layout( matrix( c(3,3,4,4,5,5, 1,1,1,1,2,2) , nrow=6, ncol=2 , byrow=FALSE ) )
par( mar=c(3.5,3.5,2.5,0.5) , mgp=c(2.25,0.7,0) )
# Select thinned steps in chain for plotting of posterior predictive curves:
chainLength = NROW( mcmcChain )
nCurvesToPlot = 30
stepIdxVec = seq( 1 , chainLength , floor(chainLength/nCurvesToPlot) )
xRange = range( y )
xLim = c( xRange[1]-0.1*(xRange[2]-xRange[1]) ,
xRange[2]+0.1*(xRange[2]-xRange[1]) )
xVec = seq( xLim[1] , xLim[2] , length=200 )
maxY = max( dt( 0 , df=max(nu[stepIdxVec]) ) / min(sigma[stepIdxVec]) )
# Plot data y and smattering of posterior predictive curves:
stepIdx = 1
plot( xVec , dt( (xVec-mu[stepIdxVec[stepIdx]])/sigma[stepIdxVec[stepIdx]] ,
df=nu[stepIdxVec[stepIdx]] )/sigma[stepIdxVec[stepIdx]] ,
ylim=c(0,maxY) , cex.lab=1.75 ,
type="l" , col="skyblue" , lwd=1 , xlab="y" , ylab="p(y)" ,
main="Data w. Post. Pred." )
for ( stepIdx in 2:length(stepIdxVec) ) {
lines(xVec, dt( (xVec-mu[stepIdxVec[stepIdx]])/sigma[stepIdxVec[stepIdx]] ,
df=nu[stepIdxVec[stepIdx]] )/sigma[stepIdxVec[stepIdx]] ,
type="l" , col="skyblue" , lwd=1 )
}
histBinWd = median(sigma)/2
histCenter = mean(mu)
histBreaks = sort( c( seq( histCenter-histBinWd/2 , min(xVec)-histBinWd/2 ,
-histBinWd ),
seq( histCenter+histBinWd/2 , max(xVec)+histBinWd/2 ,
histBinWd ) , xLim ) )
histInfo = hist( y , plot=FALSE , breaks=histBreaks )
yPlotVec = histInfo$density
yPlotVec[ yPlotVec==0.0 ] = NA
xPlotVec = histInfo$mids
xPlotVec[ yPlotVec==0.0 ] = NA
points( xPlotVec , yPlotVec , type="h" , lwd=3 , col="red" )
text( max(xVec) , maxY , bquote(N==.(length(y))) , adj=c(1.1,1.1) )
# Plot posterior distribution of parameter nu:
paramInfo = plotPost( log10(nu) , col="skyblue" , # breaks=30 ,
showCurve=showCurve ,
xlab=bquote("log10("*nu*")") , cex.lab = 1.75 , showMode=TRUE ,
main="Normality" ) # (<0.7 suggests kurtosis)
summaryInfo = rbind( summaryInfo , paramInfo )
rownames(summaryInfo)[NROW(summaryInfo)] = "log10(nu)"
# Plot posterior distribution of parameters mu1, mu2, and their difference:
xlim = range( c( mu , compValm ) )
paramInfo = plotPost( mu , xlim=xlim , cex.lab = 1.75 , ROPE=ROPEm ,
showCurve=showCurve , compVal = compValm ,
xlab=bquote(mu) , main=paste("Mean") ,
col="skyblue" )
summaryInfo = rbind( summaryInfo , paramInfo )
rownames(summaryInfo)[NROW(summaryInfo)] = "mu"
# Plot posterior distribution of sigma:
xlim=range( sigma )
paramInfo = plotPost( sigma , xlim=xlim , cex.lab = 1.75 , ROPE=ROPEsd ,
showCurve=showCurve , compVal=compValsd ,
xlab=bquote(sigma) , main=paste("Std. Dev.") ,
col="skyblue" , showMode=TRUE )
summaryInfo = rbind( summaryInfo , paramInfo )
rownames(summaryInfo)[NROW(summaryInfo)] = "sigma"
# Plot of estimated effect size:
effectSize = ( mu - compValm ) / sigma
paramInfo = plotPost( effectSize , compVal=0 , ROPE=ROPEeff ,
showCurve=showCurve ,
xlab=bquote( (mu-.(compValm)) / sigma ),
showMode=TRUE , cex.lab=1.75 , main="Effect Size" , col="skyblue" )
summaryInfo = rbind( summaryInfo , paramInfo )
rownames(summaryInfo)[NROW(summaryInfo)] = "effSz"
return( summaryInfo )
} # end of function BEST1Gplot
#==============================================================================
# MODIFIED 2015 DEC 02.
# John K. Kruschke
# johnkruschke@gmail.com
# http://www.indiana.edu/~kruschke/BEST/
#
# This program is believed to be free of errors, but it comes with no guarantee!
# The user bears all responsibility for interpreting the results.
# Please check the webpage above for updates or corrections.
if( HDIeff[2] - HDIeff[1]< maxHDIWeff ){ goalTally$HDIeff.wd.max = goalTally$HDIeff.wd.max + 1}for( i in1:length(goalTally)){ s1 = 1 + goalTally[[i]] s2 = 1 + ( nSim - goalTally[[i]]) power[[i]][1] = s1/(s1+s2) power[[i]][2:3] = HDIofICDF( qbeta , shape1=s1 , shape2=s2 )}cat("\nAfter ", nSim , " Simulated Experiments, Power Is: \n( mean, HDI_low, HDI_high )\n")for( i in1:length(power)){cat(names(power[i]) , round(power[[i]],3) , "\n")}save( nSim , power , file=saveName )}return( power )} # end of function BESTpower #============================================================================== makeData = function( mu1 , sd1 , mu2 , sd2 , nPerGrp , pcntOut=0 , sdOutMult=2.0 , rnd.seed=NULL ) { # Auxilliary function for generating random values from a # mixture of normal distibutions. if(!is.null(rnd.seed)){set.seed(rnd.seed)} # Set seed for random values. nOut = ceiling(nPerGrp*pcntOut/100) # Number of outliers. nIn = nPerGrp - nOut # Number from main distribution. if ( pcntOut > 100 | pcntOut < 0 ) { stop("pcntOut must be between 0 and 100.") } if ( pcntOut > 0 & pcntOut < 1 ) { warning("pcntOut is specified as percentage 0-100, not proportion 0-1.") } if ( pcntOut > 50 ) {
warning("pcntOut indicates more than 50% outliers; did you intend this?")
}
if( nOut <2& pcntOut >0){
stop("Combination of nPerGrp and pcntOut yields too few outliers.")
}
if( nIn <2){stop("Too few non-outliers.")} sdN = function( x ){sqrt(mean((x-mean(x))^2))} # genDat = function( mu , sigma , Nin , Nout , sigmaOutMult=sdOutMult ) { y = rnorm(n=Nin) # Random values in main distribution. y = ((y-mean(y))/sdN(y))*sigma + mu # Translate to exactly realize desired. yOut = NULL if ( Nout > 0 ) {
# Version of May 26, 2012. Re-checked on 2015 May 08.
# John K. Kruschke
# johnkruschke@gmail.com
# http://www.indiana.edu/~kruschke/BEST/
#
# This program is believed to be free of errors, but it comes with no guarantee!
# The user bears all responsibility for interpreting the results.
# Please check the webpage above for updates or corrections.
#
### ***************************************************************
### ******** SEE FILE BESTexample.R FOR INSTRUCTIONS **************
### ***************************************************************
# OPTIONAL: Clear R's memory and graphics:
rm(list=ls()) # Careful! This clears all of R's memory!
graphics.off() # This closes all of R's graphics windows.
# Get the functions loaded into R's working memory:
source("BEST.R")
#-------------------------------------------------------------------------------
# RETROSPECTIVE POWER ANALYSIS.
# !! This section assumes you have already run BESTexample.R !!
# Re-load the saved data and MCMC chain from the previously conducted
# Bayesian analysis. This re-loads the variables y1, y2, mcmcChain, etc.
load( "BESTexampleMCMC.Rdata" )
power = BESTpower( mcmcChain , N1=length(y1) , N2=length(y2) ,
ROPEm=c(-0.1,0.1) , ROPEsd=c(-0.1,0.1) , ROPEeff=c(-0.1,0.1) ,
maxHDIWm=2.0 , maxHDIWsd=2.0 , maxHDIWeff=0.2 , nRep=1000 ,
mcmcLength=10000 , saveName = "BESTexampleRetroPower.Rdata" )
#-------------------------------------------------------------------------------
# PROSPECTIVE POWER ANALYSIS, using fictitious strong data.
# Generate large fictitious data set that expresses hypothesis:
prospectData = makeData( mu1=108, sd1=17, mu2=100, sd2=15, nPerGrp=1000,
pcntOut=10, sdOutMult=2.0, rnd.seed=NULL )
y1pro = prospectDataYou can't use 'macro parameter character #' in math modey2 # Merely renames simulated data for convenience below.
# Generate Bayesian posterior distribution from fictitious data:
# (uses fewer than usual MCMC steps because it only needs nRep credible
# parameter combinations, not a high-resolution representation)
mcmcChainPro = BESTmcmc( y1pro , y2pro , numSavedSteps=2000 )
postInfoPro = BESTplot( y1pro , y2pro , mcmcChainPro , pairsPlot=TRUE )
save( y1pro, y2pro, mcmcChainPro, postInfoPro,
file="BESTexampleProPowerMCMC.Rdata" )
# Now compute the prospective power for planned sample sizes:
N1plan = N2plan = 50 # specify planned sample size
powerPro = BESTpower( mcmcChainPro , N1=N1plan , N2=N2plan , showFirstNrep=5 ,
ROPEm=c(-1.5,1.5) , ROPEsd=c(-0.0,0.0) , ROPEeff=c(-0.0,0.0) ,
maxHDIWm=15.0 , maxHDIWsd=10.0 , maxHDIWeff=1.0 , nRep=1000 ,
mcmcLength=10000 , saveName = "BESTexampleProPower.Rdata" )
#-------------------------------------------------------------------------------
# Version of May 26, 2012. Re-checked on 2015 May 08.
# John K. Kruschke
# johnkruschke@gmail.com
# http://www.indiana.edu/~kruschke/BEST/
#
# This program is believed to be free of errors, but it comes with no guarantee!
# The user bears all responsibility for interpreting the results.
# Please check the webpage above for updates or corrections.
#
### ***************************************************************
### ******** SEE FILE BESTexample.R FOR INSTRUCTIONS **************
### ***************************************************************
# OPTIONAL: Clear R's memory and graphics:
rm(list=ls()) # Careful! This clears all of R's memory!
graphics.off() # This closes all of R's graphics windows.
# Get the functions loaded into R's working memory:
source("BEST.R")
#-------------------------------------------------------------------------------
# RETROSPECTIVE POWER ANALYSIS.
# !! This section assumes you have already run BESTexample.R !!
# Re-load the saved data and MCMC chain from the previously conducted
# Bayesian analysis. This re-loads the variables y1, y2, mcmcChain, etc.
load( "BESTexampleMCMC.Rdata" )
power = BESTpower( mcmcChain , N1=length(y1) , N2=length(y2) ,
ROPEm=c(-0.1,0.1) , ROPEsd=c(-0.1,0.1) , ROPEeff=c(-0.1,0.1) ,
maxHDIWm=2.0 , maxHDIWsd=2.0 , maxHDIWeff=0.2 , nRep=1000 ,
mcmcLength=10000 , saveName = "BESTexampleRetroPower.Rdata" )
#-------------------------------------------------------------------------------
# PROSPECTIVE POWER ANALYSIS, using fictitious strong data.
# Generate large fictitious data set that expresses hypothesis:
prospectData = makeData( mu1=108, sd1=17, mu2=100, sd2=15, nPerGrp=1000,
pcntOut=10, sdOutMult=2.0, rnd.seed=NULL )
y1pro = prospectData$y1 # Merely renames simulated data for convenience below.
y2pro = prospectData$y2 # Merely renames simulated data for convenience below.
# Generate Bayesian posterior distribution from fictitious data:
# (uses fewer than usual MCMC steps because it only needs nRep credible
# parameter combinations, not a high-resolution representation)
mcmcChainPro = BESTmcmc( y1pro , y2pro , numSavedSteps=2000 )
postInfoPro = BESTplot( y1pro , y2pro , mcmcChainPro , pairsPlot=TRUE )
save( y1pro, y2pro, mcmcChainPro, postInfoPro,
file="BESTexampleProPowerMCMC.Rdata" )
# Now compute the prospective power for planned sample sizes:
N1plan = N2plan = 50 # specify planned sample size
powerPro = BESTpower( mcmcChainPro , N1=N1plan , N2=N2plan , showFirstNrep=5 ,
ROPEm=c(-1.5,1.5) , ROPEsd=c(-0.0,0.0) , ROPEeff=c(-0.0,0.0) ,
maxHDIWm=15.0 , maxHDIWsd=10.0 , maxHDIWeff=1.0 , nRep=1000 ,
mcmcLength=10000 , saveName = "BESTexampleProPower.Rdata" )
#-------------------------------------------------------------------------------
#monte carlo t test simulation
#install.packages("MonteCarlo")
library(MonteCarlo)
# Define function that generates data and applies the method of interest
ttest<-function(n,loc,scale){
# generate sample:
sample<-rnorm(n, loc, scale)
# calculate test statistic:
stat<-sqrt(n)*mean(sample)/sd(sample)
# get test decision:
decision<-abs(stat)>1.96
# return result:
return(list("decision"=decision))
}
# define parameter grid:
n_grid<-c(50,100,250,500)
loc_grid<-seq(0,1,0.2)
scale_grid<-c(1,2)
# collect parameter grids in list:
param_list=list("n"=n_grid, "loc"=loc_grid, "scale"=scale_grid)
# run simulation:
MC_result<-MonteCarlo(func=ttest, nrep=1000, param_list=param_list)
summary(MC_result)
#monte carlo t test simulation
#install.packages("MonteCarlo")
library(MonteCarlo)
# Define function that generates data and applies the method of interest
ttest<-function(n,loc,scale){
# generate sample:
sample<-rnorm(n, loc, scale)
# calculate test statistic:
stat<-sqrt(n)*mean(sample)/sd(sample)
# get test decision:
decision<-abs(stat)>1.96
# return result:
return(list("decision"=decision))
}
# define parameter grid:
n_grid<-c(50,100,250,500)
loc_grid<-seq(0,1,0.2)
scale_grid<-c(1,2)
# collect parameter grids in list:
param_list=list("n"=n_grid, "loc"=loc_grid, "scale"=scale_grid)
# run simulation:
MC_result<-MonteCarlo(func=ttest, nrep=1000, param_list=param_list)
summary(MC_result)
#install.packages("RWordPress", repos="http://www.omegahat.org/R", build=TRUE)
library(RWordPress)
options(WordPressLogin=c(user="password"),
WordPressURL="http://your_wp_installation.org/xmlrpc.php")
#[code lang='r']
#...
#[/code]
knit_hooksBracket argument to \\ must be a dimensionset(source=function(x, options) paste("\[code lang='r'\]\n", x, "\[/code\]\n", sep=""))
knit2wp <- function(file) {
require(XML)
content <- readLines(file)
content <- htmlTreeParse(content, trim=FALSE)
## WP will add the h1 header later based on the title, so delete here
contenthtmlbodyh1 <- NULL
content <- paste(capture.output(print(contenthtmlbody,
indent=FALSE, tagSeparator="")),
collapse="\n")
content <- gsub("<?.body>", "", content) # remove body tag
## enclose code snippets in SyntaxHighlighter format
content <- gsub("<?pre><code class=\"r\">", "\[code lang='r'\]\\n",
content)
content <- gsub("<?pre><code class=\"no-highlight\">", "\[code\]\\n",
content)
content <- gsub("<?pre><code>", "\[code\]\\n", content)
content <- gsub("<?/code></pre>", "\[/code\]\\n", content)
return(content)
}
newPost(content=list(description=knit2wp('rerWorkflow.html'),
title='Workflow: Post R markdown to WordPress',
categories=c('R')),
publish=FALSE)
postID <- 99 # post id returned by newPost()
editPost(postID,
content=list(description=knit2wp('rerWorkflow.html'),
title='Workflow: Post R markdown to WordPress',
categories=c('R')),
publish=FALSE)
#install.packages("RWordPress", repos="http://www.omegahat.org/R", build=TRUE)
library(RWordPress)
options(WordPressLogin=c(user="password"),
WordPressURL="http://your_wp_installation.org/xmlrpc.php")
#[code lang='r']
#...
#[/code]
knit_hooks$set(output=function(x, options) paste("\\[code\\]\n", x, "\\[/code\\]\n", sep=""))
knit_hooks$set(source=function(x, options) paste("\\[code lang='r'\\]\n", x, "\\[/code\\]\n", sep=""))
knit2wp <- function(file) {
require(XML)
content <- readLines(file)
content <- htmlTreeParse(content, trim=FALSE)
## WP will add the h1 header later based on the title, so delete here
content$children$html$children$body$children$h1 <- NULL
content <- paste(capture.output(print(content$children$html$children$body,
indent=FALSE, tagSeparator="")),
collapse="\n")
content <- gsub("<?.body>", "", content) # remove body tag
## enclose code snippets in SyntaxHighlighter format
content <- gsub("<?pre><code class=\"r\">", "\\[code lang='r'\\]\\\n",
content)
content <- gsub("<?pre><code class=\"no-highlight\">", "\\[code\\]\\\n",
content)
content <- gsub("<?pre><code>", "\\[code\\]\\\n", content)
content <- gsub("<?/code></pre>", "\\[/code\\]\\\n", content)
return(content)
}
newPost(content=list(description=knit2wp('rerWorkflow.html'),
title='Workflow: Post R markdown to WordPress',
categories=c('R')),
publish=FALSE)
postID <- 99 # post id returned by newPost()
editPost(postID,
content=list(description=knit2wp('rerWorkflow.html'),
title='Workflow: Post R markdown to WordPress',
categories=c('R')),
publish=FALSE)
Shiny Apps
Exploring the diagnosticity of the p-valueP-value distribution and power curves for an independent two-tailed t-testBIC approximation for ANOVA designsWhen does a significant p-value indicate a true effect?
Leave a Reply