Diferenças
Aqui você vê as diferenças entre duas revisões dessa página.
Ambos lados da revisão anteriorRevisão anteriorPróxima revisão | Revisão anterior | ||
artigos:ernesto3:sim [2008/09/26 08:41] – ernesto | artigos:ernesto3:sim [2008/10/13 13:00] (atual) – ernesto | ||
---|---|---|---|
Linha 1: | Linha 1: | ||
======Simulation study ====== | ======Simulation study ====== | ||
- | ===== Test beta bias ===== | + | ===== Algorithm |
<code R> | <code R> | ||
- | gs <- expand.grid((0:10)/10, (0:10)/10) | + | ################################################################################################# |
- | niter <- 100 | + | # |
- | arr <- array(NA, dim=c(niter, 2, 2), dimnames=list(niter=1:niter, stat=c(" | + | # PAPER COMPANION: SIMULATION STUDY |
+ | # | ||
+ | # Paulo Ribeiro Jr. < | ||
+ | # Date: 20081013 | ||
+ | # Objective: Test paper methodology against design-based estimators regarding | ||
+ | # association between age compositions and the age aggregated abundance. | ||
+ | # Tree options are simulated: | ||
+ | # (iii) strong association. | ||
+ | # Note: The association is induced by considering correlated multivariate gaussian | ||
+ | # | ||
+ | # | ||
+ | # | ||
+ | ################################################################################################# | ||
- | # gau | + | # R libraries |
- | set.seed(111) | + | library(geoR) |
- | gd <- grf(grid=gs, cov.pars=c(1,0.2), mean=2, nsim=niter) | + | library(lattice) |
+ | library(MASS) | ||
+ | library(RandomFields) | ||
+ | library(compositions) | ||
- | for(i in 1:niter){ | + | # |
- | cat(i) | + | # SIMULATING THE PROCESSES |
- | ggdd <- gd | + | # |
- | ggdd$data <- gd$data[,i] | + | # grid |
- | lf <- likfit(ggdd, | + | gs <- expand.grid((0:40)/40, (0:40)/40) |
- | arr[i,, | + | # size of processes |
- | } | + | n <- nrow(gs) |
+ | # number of replicates | ||
+ | nsim <- 250 | ||
+ | |||
+ | # | ||
+ | # STEP 1 : gaussian processes to build abundance and compositions | ||
+ | # | ||
- | # gau independente | + | # object |
+ | arr0 <- array(NA, dim=c(n, | ||
+ | # variance-covariance matrix | ||
+ | s2 <- 0.5 | ||
+ | Sig <- diag(c(1, | ||
+ | Sig[1,4] <- 0.45; Sig[4,1] <- 0.45; Sig[1,6] <- 0.9; Sig[6,1] <- 0.9; Sig[6,4] <- 0.5; Sig[4,6] <- 0.5 | ||
+ | Sig <- Sig*s2 | ||
+ | # simulation | ||
set.seed(111) | set.seed(111) | ||
- | gd <- grf(grid=gs, | + | for(i in 1:nsim){ |
- | + | arr0[,,i] <- mvrnorm(nrow(gs), c(1,0,0,0,0,0,0), Sig) | |
- | for(i in 1:niter){ | + | |
- | cat(i) | + | |
- | ggdd <- gd | + | |
- | ggdd$data <- gd$data[,i] | + | |
- | lf <- likfit(ggdd, ini.cov.pars=c(0.5,0.5), messages=F) | + | |
- | arr[i,,2] <- c(lf$beta, lf$beta.var) | + | |
} | } | ||
+ | |||
+ | # | ||
+ | # STEP 2: generate 250 replicates of a log-gaussian spatial process (Diggle and Ribeiro Jr, 2007) | ||
+ | # | ||
+ | # | ||
- | pdf(" | + | # Generate a spatial Gaussian process Z |
- | par(mfrow=c(2,2)) | + | phi <- 0.2 |
- | hist(arr[,1,1], main="beta for mu=2 s2=1 phi=0.2 t2=0") | + | sigmasq <- 0.5 |
- | hist(arr[,1,2], main=" | + | set.seed(222) |
- | hist(arr[,2,1], main=" | + | Z <- grf(grid=gs, cov.pars=c(sigmasq, phi), nsim=nsim) |
- | hist(arr[,2,2], main=" | + | # build a log gausian process Y |
- | dev.off() | + | Ysim <- Z |
- | </code> | + | Ysim$data <- exp(Z$data+arr0[,1,]) |
+ | # Y characteristics | ||
+ | Ysim.lmean <- apply(log(Ysim$data), 2, mean) | ||
+ | Ysim.lvar <- apply(log(Ysim$data), 2, var) | ||
+ | Ysim.lnhat <- exp(Ysim.lmean+Ysim.lvar/2) | ||
- | < | + | # |
- | O problema está na variância do beta que não é um estimador da variância do processo. Logo quando fazemos a backtrans há uma subestimação da média do processo. | + | # STEP 3: build compositions |
- | </ | + | # |
- | ===== Algorítmo ===== | + | # objects |
- | <code R> | + | arr00 <- array(1, dim=c(n, |
- | ## Modelo: variáveis com mu dependente e correlação componente espacial parcialmente comum | + | Psim <- array(NA, dim=c(n, |
- | ## Y1 = mu1 + S00 + S11 + e1 = mu1 + sig00 R + sig11 R11 + e1 | + | # option 1: no association between compositions and age aggregated abundance |
- | ## Y2 = mu2 + S00 + S22 + e2 = mu1 + sig00 R + sig22 R22 + e2 | + | arr00[,1:2,] <- exp(arr0[, |
- | ## Y3 = mu3 + S00 + S33 + e3 = mu1 + sig00 R + sig33 R33 + e3 | + | Psim[,,,1] <- aperm(apply(arr00, |
- | ## | + | # option 2: week association between compositions and age aggregated abundance |
- | ## sig00 + sig## = 1; e# = 1 | + | arr00[,1:2,] <- exp(arr0[, |
- | ## mu ~ MVG(c(mu1, mu2, mu3), Sigma) | + | Psim[,,,2] <- aperm(apply(arr00, |
- | ## mu1 = mu2 = mu3 = 1 | + | # option 3: strong association between compositions and age aggregated abundance |
- | ## diag(Sigma) = e# | + | arr00[, |
- | </code> | + | Psim[,,,3] <- aperm(apply(arr00, |
+ | # P characteristics | ||
- | **i indexa localizações | + | # |
- | j indexa idades | + | # STEP 4: build abundance at age |
- | ** | + | # |
- | * Definir grid | + | # objects |
- | * Definir sig00 (usado para gerir associação espacial entre idades) | + | Isim.ln <- array(NA, dim=c(n, |
- | * Simular Ij gaussiana | + | # sim |
- | * set sig00 e phi00 = sig00/5 | + | for(i in 1:3){ |
- | * set sig## = 1-sig00 e phi## = sig##/5 | + | for(j in 1:3){ |
- | * set mu## = 0.4 | + | Isim.ln[,i,,j] <- Psim[,i,,j]*Ysim$data |
- | * sim S ~ MVN(0,Sig) Sig=f(sig, phi) | + | } |
- | * sim mu ~ MVN({mu1,mu2,m3}, Sigmu) Sigmu = {indep, dep} | + | } |
- | * set Ij = muj + S00 + Sjj | + | # I characteristics |
- | * Construir Y = prod(exp{Ij}) | + | Isim.lnmean <- apply(log(Isim.ln), c(2,3,4), mean) |
- | * Construir Pj = exp{Ij}/ | + | Isim.lnvar <- apply(log(Isim.ln), c(2,3,4), var) |
- | * Construir Ij = Y*Pj para garantir que Y = sum_j(Ij) | + | Isim.lnhat <- exp(Isim.lnmean+Isim.lnvar/2) |
- | * Aleatorizar vector de localizações | + | |
- | * Construir amostras de Y, P e I | + | |
- | * Ajustar modelo geo | + | |
- | * Estimar Y | + | |
- | * Yhat = exp{beta+beta.var/2} | + | |
- | * Ybar = sum_i(Yi)/n | + | |
- | * Estimar Ij | + | |
- | * Ihatj = Yhat * 1/n sum_i(Pij) sendo Pij = Iij/sum_j(Iij) | + | |
- | * Ibarj = sum_i(Iij)/n | + | |
- | <code R> | + | # |
- | ## INI | + | # ESTIMATION |
- | # libraries | + | #============================================================================================== |
- | library(geoR) | + | # number of samples to be drawn from each of the 250 replicate |
- | library(lattice) | + | ns <- 2 |
- | library(MASS) | + | |
- | library(RandomFields) | + | |
- | source(" | + | |
- | # definindo grid de simulação e outros parametros da sim | + | #---------------------------------------------------------------------------------------------- |
- | gs <- expand.grid((0:40)/40, (0:40)/40) | + | # STEP 5: estimation with methodology proposed by the paper |
- | n <- nrow(gs) | + | # |
- | niter <- 100 | + | |
- | spcor <- seq(0, | + | |
- | # objectos | + | # objects |
- | Isim <- array(NA, dim=c(n,3,niter,5,2), dimnames=list(loc=1:n, age=1:3, niter=1: | + | Yres <- array(NA, dim=c(ns,4,nsim), dimnames=list(samp=1:ns, stat=c("Ybar", "Ybarvar", |
- | Isim.ln | + | Ihat <- array(NA, dim=c(ns,3,nsim,3), dimnames=list(samp=1:ns, age=1: |
- | Psim <- array(NA, dim=c(n,3,niter,5,2), dimnames=list(loc=1:n, age=1:3, niter=1:niter, spcor=spcor, mucor=c("indep", "dep"))) | + | xd <- expand.grid(dimnames(Ihat)[-c(1,2)]) |
- | Ysim <- array(NA, dim=c(n,1,niter,5,2), dimnames=list(loc=1:n, age=" | + | # estimation |
- | Yres <- array(NA, dim=c(2, | + | set.seed(222) |
- | Isim.lnhat | + | for(j in 1:ns){ |
- | Ibar <- array(NA, dim=c(3, | + | cat("\n",j ,":", date(),"-", |
- | Ihat <- array(NA, dim=c(3,niter,5,2), dimnames=list(age=1:3, niter=1:niter, spcor=spcor, | + | samp <- sample(1:n, 100) |
+ | for(i in 1:nrow(xd)){ | ||
+ | x <- xd[i,] | ||
+ | cat(".", | ||
+ | # building the sample | ||
+ | Isamp <- Isim.ln[samp,,x[[1]],x[[2]]] | ||
+ | locsamp <- gs[samp,] | ||
+ | Ysamp <- apply(Isamp,1,sum) | ||
+ | # estimation of age aggregated abundance | ||
+ | lf <- likfit(as.geodata(cbind(locsamp, Ysamp)), lambda=0, ini.cov.pars=c(1,0.2), messages=FALSE) | ||
+ | Yhat <- exp(lf$beta+lf$tausq/ | ||
+ | Yhatvar | ||
+ | Yres[j,c("Yhat", "Yhatvar"), | ||
+ | # estimation of abundance-at-age | ||
+ | prop <- acomp(Isamp) | ||
+ | Ihat[j,,x[[1]],x[[2]]] <- Yhat*c(mean(prop)) | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # | ||
+ | # STEP 6: estimation with design-based estimators | ||
+ | # | ||
- | ## SIM | + | # objects |
- | for(i in 1:length(spcor)){ | + | Ibar <- array(NA, dim=c(ns, |
- | Isim[,,,i,] <- [[artigos: | + | # estimation |
+ | set.seed(222) | ||
+ | for(j in 1:ns){ | ||
+ | cat(" | ||
+ | samp <- sample(1:n, 100) | ||
+ | for(i in 1:nrow(xd)){ | ||
+ | x <- xd[i,] | ||
+ | cat(" | ||
+ | # building the sample | ||
+ | Isamp <- Isim.ln[samp,,x[[1]],x[[2]]] | ||
+ | Ysamp | ||
+ | # store means | ||
+ | Yres[j, | ||
+ | # estimation of abundance-at-age | ||
+ | Ibar[j,,x[[1]],x[[2]]] <- apply(Isamp,2,mean) | ||
+ | } | ||
} | } | ||
- | Isim.mean <- apply(Isim, c(2,3,4,5), mean) | + | # |
- | Isim.var <- apply(Isim, c(2,3,4,5), var) | + | # RESULTS |
+ | # | ||
- | # Ysim como produto de lognormais | + | #---------------------------------------------------------------------------------------------- |
- | Ysim[,1,,,] <- apply(exp(Isim), | + | # STEP 7: summary statistics |
- | # cractarerísticas de Y | + | # |
- | Ysim.smean <- apply(Ysim, c(3,4,5), mean) | + | |
- | Ysim.svar <- apply(Ysim, c(3,4,5), var) | + | |
- | Ysim.lmean <- apply(log(Ysim), | + | |
- | Ysim.lvar <- apply(log(Ysim), | + | |
- | # composições | + | |
- | Psim[] <- aperm(apply(exp(Isim), | + | |
- | # observações das marginais | + | |
- | Isim.ln[, | + | |
- | Isim.ln[, | + | |
- | Isim.ln[, | + | |
- | Isim.lnmean <- apply(log(Isim.ln), | + | |
- | Isim.lnvar <- apply(log(Isim.ln), | + | |
- | Isim.lnhat <- exp(Isim.lnmean+Isim.lnvar/ | + | |
- | ## running our model | + | # objects |
- | xd <- expand.grid(dimnames(Ihat)[-1]) | + | sim.res |
- | samp <- sample(1:n, 100) | + | # computation |
+ | for(i in 1:nsim){ | ||
+ | lmusim <- Isim.lnhat[,i,] | ||
+ | # geo | ||
+ | m <- Ihat[,,i,] | ||
+ | q025 <- apply(m, | ||
+ | q975 <- apply(m,c(2,3), quantile, probs=0.975, | ||
+ | sim.res[i,,," | ||
+ | for(j in 1:ns){ | ||
+ | m[j,,] <- m[j,, | ||
+ | } | ||
+ | sim.res[i,,," | ||
+ | sim.res[i,,," | ||
- | for(i in 1: | + | # samp |
- | x <- xd[i,] | + | m <- Ibar[,,i,] |
- | cat(as.character(unlist(x))," | + | q025 <- apply(m,c(2,3), quantile, probs=0.025, na.rm=T) |
- | Isamp | + | q975 <- apply(m,c(2,3), quantile, probs=0.975, na.rm=T) |
- | locsamp <- gs[samp,] | + | sim.res[i,,," |
- | Ysamp <- Ysim[samp,,x[[1]], | + | for(j in 1:ns){ |
- | # geodata | + | m[j,,] <- m[j,,]-lmusim |
- | gd <- as.geodata(cbind(locsamp, Ysamp)) | + | } |
- | lf <- likfit(gd, lambda=0, ini.cov.pars=c(1,0.2), messages=FALSE) | + | sim.res[i,,," |
- | Yhat <- exp(lf$beta+lf$beta.var/2) | + | sim.res[i,,," |
- | # store means | + | |
- | Yres[,x[[1]],x[[2]],x[[3]]] <- c(mean(Ysamp), | + | |
- | # compositions | + | |
- | prop <- apply(Isamp,1, | + | |
- | prop[is.na(prop)]<-0 | + | |
- | Ihat[,x[[1]],x[[2]],x[[3]]] <- Yhat*apply(prop,1,mean) | + | |
- | Ibar[,x[[1]],x[[2]],x[[3]]] <- apply(Isamp,2,mean) | + | |
} | } | ||
+ | # table | ||
+ | tab.res <- apply(sim.res, | ||
- | Ihat.bias <- apply(Ihat - exp(1.2+log(0.33)), | + | ################################################################################################# |
- | Ihat.mse <- apply(Ihat - exp(1.2+log(0.33)+0.7/2), c(1,3,4), var) | + | # THE END (or the beginning |
- | Ibar.bias <- apply(Ibar - exp(1.2+log(0.33)+0.7/ | + | ################################################################################################# |
- | Ibar.mse <- apply(Ibar - exp(1.2+log(0.33)+0.7/ | + | |
</ | </ | ||
+ | ===== Results ===== | ||
- | |||
- | |||