Diferenças

Aqui você vê as diferenças entre duas revisões dessa página.

--- artigos:ernesto3:sim [2008/09/26 08:46] – ernesto
+++ artigos:ernesto3:sim [2008/10/13 13:00] (atual) – ernesto
@@ Linha 1: / Linha 1: @@
 ======Simulation study ======
-===== Test beta bias =====
+===== Algorithm =====
 <code R>
-gs <- expand.grid((0:10)/10, (0:10)/10)
+#################################################################################################
-niter <- 100
+#
-arr <- array(NA, dim=c(niter, 2, 2), dimnames=list(niter=1:niter, stat=c("beta","beta.var"), dep=c(1,0)))
+#	PAPER COMPANION: SIMULATION STUDY
+#	Authors:	Ernesto Jardim <ernesto@ipimar.pt>
+#			Paulo Ribeiro Jr. <paulojus@ufpr.br>
+#	Date: 20081013
+#	Objective: 	Test paper methodology against design-based estimators regarding
+# 			association between age compositions and the age aggregated abundance.
+#			Tree options are simulated: (i) no association, (ii) week association,
+#			(iii) strong association.
+#	Note:		The association is induced by considering correlated multivariate gaussian
+#			processes as the basis for building the age aggregated abundance and age
+#			compositions.
+#
+#################################################################################################
-# gau
+# R libraries
-set.seed(111)
-gd <- grf(grid=gs, cov.pars=c(1,0.2), mean=2, nsim=niter)
-for(i in 1:niter){
-cat(i)
-	ggdd <- gd
-	ggdd$data <- gd$data[,i]
-	lf <- likfit(ggdd, ini.cov.pars=c(1,0.2), messages=FALSE)
-	arr[i,,1] <- c(lf$beta, lf$beta.var)
-}
-# gau independente
-set.seed(111)
-gd <- grf(grid=gs, cov.pars=c(0,0), mean=2, nsim=niter, nugget=1)
-for(i in 1:niter){
-cat(i)
-	ggdd <- gd
-	ggdd$data <- gd$data[,i]
-	lf <- likfit(ggdd, ini.cov.pars=c(0.5,0.5), messages=F)
-	arr[i,,2] <- c(lf$beta, lf$beta.var)
-}
-pdf("betabias.pdf")
-par(mfrow=c(2,2))
-hist(arr[,1,1], main="beta for mu=2 s2=1 phi=0.2 t2=0")
-hist(arr[,1,2], main="beta for mu=2 s2=0 phi=0 t2=1")
-hist(arr[,2,1], main="beta.var")
-hist(arr[,2,2], main="beta.var")
-dev.off()
-</code>
-<note>
-O problema está na variância do beta que não é um estimador da variância do processo. Logo quando fazemos a backtrans há uma subestimação da média do processo.
-</note>
-===== Algorítmo =====
-<code R>
-## Modelo: variáveis com mu dependente e correlação componente espacial parcialmente comum
-##	Y1 = mu1 + S00 + S11 + e1 = mu1 + sig00 R + sig11 R11 + e1
-##	Y2 = mu2 + S00 + S22 + e2 = mu1 + sig00 R + sig22 R22 + e2
-##	Y3 = mu3 + S00 + S33 + e3 = mu1 + sig00 R + sig33 R33 + e3
-##	Constraint by:
-##		sig00 + sig## = 1; e# = 1
-##		mu ~ MVG(c(mu1, mu2, mu3), Sigma)
-##		mu1 = mu2 = mu3 = 1
-##		diag(Sigma) = e#
-</code>
-**i indexa localizações
-j indexa idades
-**
-  * Definir grid
-  * Definir sig00 (usado para gerir associação espacial entre idades)
-  * Simular Ij gaussiana
-     * set sig00 e phi00 = sig00/5
-     * set sig## = 1-sig00 e phi## = sig##/5
-     * set mu## = 0.4
-     * sim S ~ MVN(0,Sig) Sig=f(sig, phi)
-     * sim mu ~ MVN({mu1,mu2,m3}, Sigmu) Sigmu = {indep, dep}
-     * set Ij = muj + S00 + Sjj
-  * Construir Y = prod(exp{Ij})
-  * Construir Pj = exp{Ij}/sum_j(exp{Ij})
-  * Construir Ij = Y*Pj para garantir que Y = sum_j(Ij)
-  * Aleatorizar vector de localizações (100)
-  * Construir amostras de Y, P e I
-  * Ajustar modelo geo
-  * Estimar Y
-    * Yhat = exp{beta+beta.var/2}
-    * Ybar = sum_i(Yi)/n
-  * Estimar Ij
-    * Ihatj = Yhat * 1/n sum_i(Pij) sendo Pij = Iij/sum_j(Iij)
-    * Ibarj = sum_i(Iij)/n
-<code R>
-## INI
-# libraries
 library(geoR)
 library(lattice)
 library(MASS)
 library(RandomFields)
-source("funs.R")
+library(compositions)
-# definindo grid de simulação e outros parametros da sim
+#==============================================================================================
+# SIMULATING THE PROCESSES
+#==============================================================================================
+# grid
 gs <- expand.grid((0:40)/40, (0:40)/40)
+# size of processes
 n <- nrow(gs)
-niter <- 100
+# number of replicates
-spcor <- seq(0,1,len=5)
+nsim <- 250
-# objectos
+#----------------------------------------------------------------------------------------------
-Isim <- array(NA, dim=c(n,3,niter,5,2), dimnames=list(loc=1:n, age=1:3, niter=1:niter, spcor=spcor, mucor=c("indep", "dep")))
+# STEP 1 : gaussian processes to build abundance and compositions
-Isim.ln <- array(NA, dim=c(n,3,niter,5,2), dimnames=list(loc=1:n, age=1:3, niter=1:niter, spcor=spcor, mucor=c("indep", "dep")))
+#----------------------------------------------------------------------------------------------
-Psim <- array(NA, dim=c(n,3,niter,5,2), dimnames=list(loc=1:n, age=1:3, niter=1:niter, spcor=spcor, mucor=c("indep", "dep")))
-Ysim <- array(NA, dim=c(n,1,niter,5,2), dimnames=list(loc=1:n, age="all", niter=1:niter, spcor=spcor, mucor=c("indep", "dep")))
-Yres <- array(NA, dim=c(2,niter,5,2), dimnames=list(stat=c("Ybar","Yhat"), niter=1:niter, spcor=spcor, mucor=c("indep", "dep")))
-Isim.lnhat <- array(NA, dim=c(3,niter,5,2), dimnames=list(age=1:3, niter=1:niter, spcor=spcor, mucor=c("indep", "dep")))
-Ibar <- array(NA, dim=c(3,niter,5,2), dimnames=list(age=1:3, niter=1:niter, spcor=spcor, mucor=c("indep", "dep")))
-Ihat <- array(NA, dim=c(3,niter,5,2), dimnames=list(age=1:3, niter=1:niter, spcor=spcor, mucor=c("indep", "dep")))
-## SIM (função isim abaixo)
+# object
-for(i in 1:length(spcor)){
+arr0 <- array(NA, dim=c(n,7,nsim), dimnames=list(loc=1:n, age=c("all","1i","2i","1w","2w","1s","2s"), nsim=1:nsim))
-	Isim[,,,i,] <- isim(gs, spcor[i], niter, 0.2)
+# variance-covariance matrix
+s2 <- 0.5
+Sig <- diag(c(1,1,1,1,1,1,1))
+Sig[1,4] <- 0.45; Sig[4,1] <- 0.45; Sig[1,6] <- 0.9; Sig[6,1] <- 0.9; Sig[6,4] <- 0.5; Sig[4,6] <- 0.5
+Sig <- Sig*s2
+# simulation
+set.seed(111)
+for(i in 1:nsim){
+	arr0[,,i] <- mvrnorm(nrow(gs), c(1,0,0,0,0,0,0), Sig)
 }
+#----------------------------------------------------------------------------------------------
+# STEP 2: generate 250 replicates of a log-gaussian spatial process (Diggle and Ribeiro Jr, 2007)
+#         with  mu=1, phi=0.2, sigma2=0.5, tau2=0.5
+#----------------------------------------------------------------------------------------------
-Isim.mean <- apply(Isim, c(2,3,4,5), mean)
+# Generate a spatial Gaussian process Z
-Isim.var <- apply(Isim, c(2,3,4,5), var)
+phi <- 0.2
+sigmasq <- 0.5
+set.seed(222)
+Z <- grf(grid=gs, cov.pars=c(sigmasq, phi), nsim=nsim)
+# build a log gausian process Y
+Ysim <- Z
+Ysim$data <- exp(Z$data+arr0[,1,])
+# Y characteristics
+Ysim.lmean <- apply(log(Ysim$data), 2, mean)
+Ysim.lvar <- apply(log(Ysim$data), 2, var)
+Ysim.lnhat <- exp(Ysim.lmean+Ysim.lvar/2)
-# Ysim como produto de lognormais
+#----------------------------------------------------------------------------------------------
-Ysim[,1,,,] <- apply(exp(Isim), c(1,3,4,5), prod)
+# STEP 3: build compositions
-# cractarerísticas de Y
+#----------------------------------------------------------------------------------------------
-Ysim.smean <- apply(Ysim, c(3,4,5), mean)
-Ysim.svar <- apply(Ysim, c(3,4,5), var)
-Ysim.lmean <- apply(log(Ysim), c(3,4,5), mean)
-Ysim.lvar <- apply(log(Ysim), c(3,4,5), var)
-# composições
-Psim[] <- aperm(apply(exp(Isim), c(1,3,4,5), function(x) x/sum(x)), c(2,1,3,4,5))
-# observações das marginais
-Isim.ln[,1,,,] <- Ysim*Psim[,1,,,,drop=FALSE]
-Isim.ln[,2,,,] <- Ysim*Psim[,2,,,,drop=FALSE]
-Isim.ln[,3,,,] <- Ysim*Psim[,3,,,,drop=FALSE]
-Isim.lnmean <- apply(log(Isim.ln), c(2,3,4,5), mean)
-Isim.lnvar <- apply(log(Isim.ln), c(2,3,4,5), var)
-Isim.lnhat <- exp(Isim.lnmean+Isim.lnvar/2)
-## running our model
+# objects
-xd <- expand.grid(dimnames(Ihat)[-1])
+arr00 <- array(1, dim=c(n,3,nsim), dimnames=list(loc=1:n, age=1:3, nsim=1:nsim))
-samp <- sample(1:n, 100)
+Psim <- array(NA, dim=c(n,3,nsim,3), dimnames=list(loc=1:n, age=1:3, nsim=1:nsim, mucor=c("indep", "dep045","dep090")))
+# option 1: no association between compositions and age aggregated abundance
+arr00[,1:2,] <- exp(arr0[,2:3,])
+Psim[,,,1] <- aperm(apply(arr00,c(1,3),function(x) x/sum(x)),c(2,1,3))
+# option 2: week association between compositions and age aggregated abundance
+arr00[,1:2,] <- exp(arr0[,4:5,])
+Psim[,,,2] <- aperm(apply(arr00,c(1,3),function(x) x/sum(x)),c(2,1,3))
+# option 3: strong association between compositions and age aggregated abundance
+arr00[,1:2,] <- exp(arr0[,6:7,])
+Psim[,,,3] <- aperm(apply(arr00,c(1,3),function(x) x/sum(x)),c(2,1,3))
+# P characteristics
-for(i in 1:nrow(xd)){
+#----------------------------------------------------------------------------------------------
-	x <- xd[i,]
+# STEP 4: build abundance at age
-	cat(as.character(unlist(x)),"\n")
+#----------------------------------------------------------------------------------------------
-	Isamp <- Isim.ln[samp,,x[[1]],x[[2]],x[[3]]]
-	locsamp <- gs[samp,]
+# objects
-	Ysamp <- Ysim[samp,,x[[1]],x[[2]],x[[3]]]
+Isim.ln <- array(NA, dim=c(n,3,nsim,3), dimnames=list(loc=1:n, age=1:3, nsim=1:nsim, mucor=c("indep", "dep045","dep090")))
-	# geodata
+# sim
-	gd <- as.geodata(cbind(locsamp, Ysamp))
+for(i in 1:3){
-	lf <- likfit(gd, lambda=0, ini.cov.pars=c(1,0.2), messages=FALSE)
+	for(j in 1:3){
-	Yhat <- exp(lf$beta+lf$beta.var/2)
+		Isim.ln[,i,,j] <- Psim[,i,,j]*Ysim$data
-	# store means
+	}
-	Yres[,x[[1]],x[[2]],x[[3]]] <- c(mean(Ysamp), Yhat)
-	# compositions
-	prop <- apply(Isamp,1,function(x) x/sum(x))
-	prop[is.na(prop)]<-0
-	Ihat[,x[[1]],x[[2]],x[[3]]] <- Yhat*apply(prop,1,mean)
-	Ibar[,x[[1]],x[[2]],x[[3]]] <- apply(Isamp,2,mean)
 }
-</code>
+# I characteristics
+Isim.lnmean <- apply(log(Isim.ln), c(2,3,4), mean)
+Isim.lnvar <- apply(log(Isim.ln), c(2,3,4), var)
+Isim.lnhat <- exp(Isim.lnmean+Isim.lnvar/2)
-<code R>
+#==============================================================================================
-isim <- function(gs, sig00, niter, tsq00, seed=333){
+# ESTIMATION
-	set.seed(seed)
+#==============================================================================================
-	arr <- array(NA, dim=c(n=nrow(gs),3,niter,2), dimnames=list(loc=1:n, age=1:3, niter=1:niter, mucor=c("indep", "dep")))
+# number of samples to be drawn from each of the 250 replicate
-	m.arr <- array(NA, dim=c(n=nrow(gs),3,niter), dimnames=list(loc=1:n, age=1:3, niter=1:niter))
+ns <- 2
-	# parâmetros do componente comum (S00)
+#----------------------------------------------------------------------------------------------
-	phi00 <- sig00/5
+# STEP 5: estimation with methodology proposed by the paper
+#----------------------------------------------------------------------------------------------
-	# parâmetros dos componentes individuais
+# objects
-	sig11 <- sig22 <- sig33 <- 1-sig00
+Yres <- array(NA, dim=c(ns,4,nsim), dimnames=list(samp=1:ns, stat=c("Ybar", "Ybarvar", "Yhat", "Yhatvar"), nsim=1:nsim))
-	phi11 <- phi22 <- phi33 <- (1-sig00)/5
+Ihat <- array(NA, dim=c(ns,3,nsim,3), dimnames=list(samp=1:ns, age=1:3, nsim=1:nsim, mucor=c("indep", "dep045","dep090")))
-	mu1 <- mu2 <- mu3 <- 0.4
+xd <- expand.grid(dimnames(Ihat)[-c(1,2)])
+# estimation
+set.seed(222)
+for(j in 1:ns){
+cat("\n",j ,":", date(),"-", sep="")
+	samp <- sample(1:n, 100)
+	for(i in 1:nrow(xd)){
+		x <- xd[i,]
+		cat(".", sep="")
+		# building the sample
+		Isamp <- Isim.ln[samp,,x[[1]],x[[2]]]
+		locsamp <- gs[samp,]
+		Ysamp <- apply(Isamp,1,sum)
+		# estimation of age aggregated abundance
+		lf <- likfit(as.geodata(cbind(locsamp, Ysamp)), lambda=0, ini.cov.pars=c(1,0.2), messages=FALSE)
+		Yhat <- exp(lf$beta+lf$tausq/2+lf$sigmasq/2)
+		Yhatvar <- exp(2*lf$beta+lf$tausq+lf$sigmasq)*(exp(lf$tausq+lf$sigmasq)-1)
+		Yres[j,c("Yhat", "Yhatvar"),x[[1]]] <- c(Yhat, Yhatvar)
+		# estimation of abundance-at-age
+		prop <- acomp(Isamp)
+		Ihat[j,,x[[1]],x[[2]]] <- Yhat*c(mean(prop))
+	}
+}
-	## simulando S
+#----------------------------------------------------------------------------------------------
-	S00 <- grf(grid=gs, cov.pars=c(sig00/2, phi00), nsim=niter)
+# STEP 6: estimation with design-based estimators
-	S11 <- grf(grid=gs, cov.pars=c(sig11/2, phi11), nsim=niter)
+#----------------------------------------------------------------------------------------------
-	S22 <- grf(grid=gs, cov.pars=c(sig22/2, phi22), nsim=niter)
-	S33 <- grf(grid=gs, cov.pars=c(sig33/2, phi33), nsim=niter)
-	## simulando mu
+# objects
-	# independent
+Ibar <- array(NA, dim=c(ns,3,nsim,3), dimnames=list(samp=1:ns, page=1:3, nsim=1:nsim, mucor=c("indep", "dep045","dep090")))
-	set.seed(111)
+# estimation
-	Cor <- diag(c(1,1,1))
+set.seed(222)
-	for(i in 1:niter){
+for(j in 1:ns){
-		m.arr[,,i] <- mvrnorm(n, c(mu1, mu2, mu3), Cor*tsq00)
+cat("\n",j ,":", date(),"-", sep="")
+	samp <- sample(1:n, 100)
+	for(i in 1:nrow(xd)){
+		x <- xd[i,]
+		cat(".", sep="")
+		# building the sample
+		Isamp <- Isim.ln[samp,,x[[1]],x[[2]]]
+		Ysamp <- apply(Isamp,1,sum)
+		# store means
+		Yres[j,c("Ybar", "Ybarvar"),x[[1]]] <- c(mean(Ysamp), var(Ysamp))
+		# estimation of abundance-at-age
+		Ibar[j,,x[[1]],x[[2]]] <- apply(Isamp,2,mean)
 	}
-	## construindo Y = S+e
-	arr[,1,,1] <- m.arr[,1,] + S00$data + S11$data
-	arr[,2,,1] <- m.arr[,2,] + S00$data + S22$data
-	arr[,3,,1] <- m.arr[,3,] + S00$data + S33$data
-	# dependent
-	set.seed(111)
-	Cor[2,1] <- -0.9
-	Cor[1,2] <- -0.9
-	for(i in 1:niter){
-		m.arr[,,i] <- mvrnorm(n, c(mu1, mu2, mu3), Cor*tsq00)
-	}
-	## construindo Y = S+e
-	arr[,1,,2] <- m.arr[,1,] + S00$data + S11$data
-	arr[,2,,2] <- m.arr[,2,] + S00$data + S22$data
-	arr[,3,,2] <- m.arr[,3,] + S00$data + S33$data
-	# out
-	arr
 }
-</code>
-===== Resultados =====
+#==============================================================================================
+# RESULTS
+#==============================================================================================
-<code R>
+#----------------------------------------------------------------------------------------------
-Ihat.bias <- apply(Ihat - exp(1.2+log(0.33)+0.7/2), c(1,3,4), mean)
+# STEP 7: summary statistics
-Ihat.mse <- apply(Ihat - exp(1.2+log(0.33)+0.7/2), c(1,3,4), var)
+#----------------------------------------------------------------------------------------------
-Ibar.bias <- apply(Ibar - exp(1.2+log(0.33)+0.7/2), c(1,3,4), mean)
-Ibar.mse <- apply(Ibar - exp(1.2+log(0.33)+0.7/2), c(1,3,4), var)
-> Ihat.bias
+# objects
-, , mucor = indep
+sim.res <- array(NA, dim=c(nsim,3,3,2,3), dimnames=list(iter=1:nsim, age=1:3, mucor=c("indep", "dep045","dep090"), meth=c("geo","samp"), stats=c("bias", "mse", "ICcov")))
+# computation
+for(i in 1:nsim){
+	lmusim <- Isim.lnhat[,i,]
+	# geo
+	m <- Ihat[,,i,]
+	q025 <- apply(m,c(2,3), quantile, probs=0.025, na.rm=T)
+	q975 <- apply(m,c(2,3), quantile, probs=0.975, na.rm=T)
+	sim.res[i,,,"geo","ICcov"] <- q025<=lmusim & q975>=lmusim
+	for(j in 1:ns){
+		m[j,,] <- m[j,,]-lmusim
+	}
+	sim.res[i,,,"geo","bias"] <- apply(m, c(2,3), mean)
+	sim.res[i,,,"geo","mse"] <- apply(m, c(2,3), var)
-   spcor
+	# samp
-age          0       0.25        0.5       0.75         1
+	m <- Ibar[,,i,]
- -0.2310553 -0.2861606 -0.3006835 -0.1239977 0.3697025
+	q025 <- apply(m,c(2,3), quantile, probs=0.025, na.rm=T)
- -0.2123689 -0.2996022 -0.2987973 -0.1059902 0.3663539
+	q975 <- apply(m,c(2,3), quantile, probs=0.975, na.rm=T)
- -0.2199671 -0.2902498 -0.2877976 -0.1249653 0.3581888
+	sim.res[i,,,"samp","ICcov"] <- q025<=lmusim & q975>=lmusim
+	for(j in 1:ns){
-, , mucor = dep
+		m[j,,] <- m[j,,]-lmusim
+	}
-   spcor
+	sim.res[i,,,"samp","bias"] <- apply(m, c(2,3), mean)
-age          0       0.25        0.5        0.75         1
+	sim.res[i,,,"samp","mse"] <- apply(m, c(2,3), var)
- -0.1963665 -0.2576779 -0.2853226 -0.12670738 0.3707011
+}
--0.1859164 -0.2815529 -0.2817175 -0.09679016 0.3764652
+# table
- -0.2268756 -0.3127838 -0.3202839 -0.17392410 0.2859450
+tab.res <-  apply(sim.res, c(2,3,4,5), mean)
-> Ibar.bias
-, , mucor = indep
-   spcor
-age        0     0.25      0.5     0.75        1
-1.347212 3.053397 4.841803 6.413725 10.24748
-1.460882 2.975048 4.877596 6.232822 10.83875
- 1.507800 2.930321 4.902475 6.425564 10.72584
-, , mucor = dep
-   spcor
-age         0     0.25      0.5     0.75        1
-0.8780813 2.174725 3.593623 5.212714 8.202600
-0.8637434 2.198764 3.656170 4.997954 8.087312
-1.2216741 2.651468 4.023795 5.665337 9.817139
+#################################################################################################
+# THE END (or the beginning ...)
+#################################################################################################
 </code>
+===== Results =====

Diferenças

Navegação

Busca

Ferramentas

QR Code