Diferenças
Aqui você vê as diferenças entre duas revisões dessa página.
Ambos lados da revisão anteriorRevisão anteriorPróxima revisão | Revisão anterior | ||
pessoais:jcfaria [2007/03/01 11:47] – jcfaria | pessoais:jcfaria [2012/08/01 19:25] (atual) – jcfaria | ||
---|---|---|---|
Linha 1: | Linha 1: | ||
- | ====== | + | ====== José Cláudio Faria ====== |
- | {{pessoais: | + | {{ pessoais: |
- | Na Praia do Sul de Ilhéus/Bahia (janeiro de 2007) refletindo profundamente sobre o R!!! | + | Eu na Praia do Sul de Ilhéus/BA, em janeiro de 2007, refletindo profundamente sobre estatística computacional e o R!!! |
+ | |||
+ | Brincadeiras a parte... | ||
+ | |||
+ | **1. Quem sou** | ||
+ | |||
+ | - Engenheiro Agrônomo | ||
+ | - Mestrado e Doutorado em Produção Vegetal pela Universidade Federal de Viçosa - UFV/MG | ||
+ | - Pós-doc em estatística e experimentação agronômica (ESALQ) | ||
+ | |||
+ | |||
+ | **2. O que tenho feito profissionalmente** | ||
+ | |||
+ | - Professor de estatística e pesquisador da Universidade Estadual de Santa Cruz - UESC/BA; | ||
+ | |||
+ | - Tenho estado desenvolvendo algumas soluções computacionais voltadas para o ambiente R: | ||
+ | - Editores: | ||
+ | - Tinn-R ([[http:// | ||
+ | - Vim-R-plugin ([[http:// | ||
+ | - Pacotes: | ||
+ | - bpca ([[http:// | ||
+ | - TinnR ([[http:// | ||
+ | - fdth ([[http:// | ||
+ | - ScottKnott ([[http:// | ||
+ | - TukeyC ([[http:// | ||
+ | |||
+ | |||
+ | **3. Sobre o R** | ||
+ | |||
+ | - Gostaria de tê-lo encontrado desde o início de minha carreira na área de estatística computacional! | ||
+ | |||
+ | |||
+ | **4. Sobre o futuro** | ||
+ | |||
+ | - Desejo aprofundar os conhecimentos em análise multivariada de dados no ambiente R; | ||
+ | |||
+ | - Trocar experiências com pessoas e equipes envolvidas nestas áreas. | ||
===== Tinn-R ===== | ===== Tinn-R ===== | ||
Linha 563: | Linha 599: | ||
==== Funções úteis ==== | ==== Funções úteis ==== | ||
- | === Tabelas e histogramas | + | === Superfície de resposta |
- | == Função | + | == Função |
+ | The simple, power and very flexible function **plotlm3d** enables you to plot 3d points and/or surfaces obtained from linear methods. It was adapted from scatter3d [[http:// | ||
- | Função simples, flexível mas poderosa para descrever, via tabela de distribuição de freqüências e histogramas, | + | It requires the **rgl** package that you can download from [[http:// |
< | < | ||
# | # | ||
- | # Name : | + | # Name : |
- | # Original author: | + | # Original author: |
- | # Date (dd/ | + | # Changes |
- | # Version | + | # Date (dd/ |
- | # Aim : To make tables of frequency distribution and associated | + | # Version |
- | # histogram | + | # Aim : To plot 3d scatter, an or, surfaces with rgl package |
# | # | ||
+ | |||
# Arguments: | # Arguments: | ||
- | # breaks | + | # x |
- | # by | + | # y |
- | # end : Last class (high value) | + | # z |
- | # h : Classes extent | + | # surface |
- | # k : Class number | + | # model |
- | # right : Intervals right open (default = FALSE) | + | # groups |
- | # start : First class (small value) | + | # a different surface or set of surfaces is plotted for each |
- | # x : A R object | + | # level of the factor; in this event, the colours in plane.col |
- | # histogram | + | # are used successively for the points and surfaces. |
- | # title.histogram: Title of histogram c(' | + | # model.by.group |
- | # | + | # of groups; the order of the models must be the same of the |
- | + | # level of the. | |
- | # Common functions | + | # model.summary |
- | tb.make.table.I <- function(x, | + | # simple.axes |
- | start, | + | # box |
- | end, | + | # xlab, |
- | h, | + | # ylab, |
- | right, | + | # zlab axis labels. |
- | | + | # surface.col |
- | | + | # |
+ | # point.col | ||
+ | # grid.col | ||
+ | # grid plot grid lines on the regression surface(s) | ||
+ | # grid.lines number | ||
+ | # the x and z directions. | ||
+ | # sphere.factor | ||
+ | # | ||
+ | # threshold | ||
+ | # | ||
+ | # speed | ||
+ | # revolutions | ||
+ | |||
+ | plotlm3d <- function (x, y, z, | ||
+ | surface | ||
+ | model | ||
+ | groups | ||
+ | model.by.group | ||
+ | model.summary | ||
+ | simple.axes | ||
+ | box | ||
+ | xlab = deparse(substitute(x)), | ||
+ | ylab = deparse(substitute(y)), | ||
+ | zlab = deparse(substitute(z)), | ||
+ | surface.col | ||
+ | ' | ||
+ | | ||
+ | grid.col = material3d(" | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
{ | { | ||
- | | + | |
- | | + | |
- | | + | |
- | | + | |
- | facP <- 100*(cumsum(f/length(x))) # Cumulative freq, % | + | |
- | | + | |
- | | + | stop('Model number is different of the number of groups' |
- | frP <- round(as.numeric(frP), 2) | + | |
- | fac <- round(as.numeric(fac), 2) | + | |
- | | + | stop(' |
- | res <- data.frame(fi, fr, frP, fac, facP) # Make final table | + | |
- | | + | |
+ | stop('groups variable must be a factor.') | ||
+ | |||
+ | xlab; ylab; zlab | ||
- | | + | |
- | | + | |
- | | + | |
- | breaks = seq(start, end, h), | + | complete.cases(x, y, z, groups) |
- | freq = T, | + | |
- | right = right, | + | |
- | xlab = 'Class limits', | + | y <- y[valid] |
- | col = ' | + | z <- z[valid] |
- | main = titleH, | + | |
- | xlim = c(start, end), ylim=c(0, max(fi)), | + | if (!is.null(groups)) |
- | las | + | |
- | | + | |
- | | + | levs <- levels(groups) |
+ | size <- max(c(x,y,z))/100 * sphere.factor | ||
+ | |||
+ | if (is.null(groups)) { | ||
+ | | ||
+ | spheres3d(x, | ||
+ | | ||
+ | points3d(x, y, z, color = point.col) | ||
} | } | ||
- | return(res) | ||
- | } | ||
- | |||
- | tb.make.table.II <- function (x, | ||
- | k, | ||
- | breaks=c(' | ||
- | right=FALSE, | ||
- | histogram, | ||
- | titleH) | ||
- | { | ||
- | x <- na.omit(x) | ||
- | |||
- | # User defines only x and/or ' | ||
- | # (x, {k, | ||
- | if (missing(k)) { | ||
- | brk <- match.arg(breaks) | ||
- | switch(brk, | ||
- | | ||
- | | ||
- | | ||
- | tmp <- range(x) | ||
- | start <- tmp[1] - abs(tmp[2])/ | ||
- | end <- tmp[2] + abs(tmp[2])/ | ||
- | R <- end-start | ||
- | h <- R/k | ||
- | } | ||
- | |||
- | # User defines ' | ||
- | # (x, k,[breaks, right]) | ||
else { | else { | ||
- | | + | |
- | start <- tmp[1] - abs(tmp[2])/100 | + | |
- | | + | |
- | R <- end-start | + | points3d(x, y, z, color = surface.col[as.numeric(groups)]) |
- | h <- R/abs(k) | + | |
} | } | ||
- | tbl <- tb.make.table.I(x, | ||
- | return(tbl) | ||
- | } | ||
- | # With Gabor Grotendieck suggestions | + | aspect3d(c(1, 1, 1)) |
- | tb.table <- function(x, ...) UseMethod(" | + | |
- | # Table form vectors | + | if (surface) { |
- | tb.table.default | + | |
- | k, | + | yvals <- seq(min(y), max(y), length |
- | | + | |
- | | + | |
- | h, | + | |
- | | + | for (i in 1: |
- | right=FALSE, | + | |
- | histogram=TRUE, | + | |
- | title.histogram=c(' | + | |
- | { | + | if (model.summary) |
- | # User defines nothing or not ' | + | summaries[[model[i]]] <- summary(mod) |
- | stopifnot(is.numeric(x)) | + | |
- | | + | |
- | # User defines only ' | + | zhat <- matrix(predict(mod, newdata |
- | # (x, {k, start, end, h}, [breaks, right]) | + | |
- | if (missing(k) && missing(start) && missing(end) && missing(h) ) { | + | |
- | brk <- match.arg(breaks) | + | |
- | switch(brk, | + | |
- | | + | |
- | | + | |
- | | + | |
- | | + | |
- | start <- tmp[1] - abs(tmp[2])/ | + | |
- | end <- tmp[2] + abs(tmp[2])/ | + | |
- | R <- end-start | + | |
- | h <- R/k | + | |
- | } | + | |
- | + | ||
- | # User defines ' | + | |
- | # (x, k, {start, end, h}, [breaks, right]) | + | |
- | else if (missing(start) && missing(end) && missing(h)) { | + | |
- | stopifnot(length(k) >= 1) | + | |
- | tmp <- range(x) | + | |
- | start <- tmp[1] - abs(tmp[2])/ | + | |
- | end <- tmp[2] + abs(tmp[2])/ | + | |
- | R <- end-start | + | |
- | h <- R/abs(k) | + | |
- | } | + | |
- | + | ||
- | # User defines ' | + | |
- | # (x, {k,} start, end, {h,} [breaks, right]) | + | |
- | else if (missing(k) && missing(h)) { | + | |
- | stopifnot(length(start) >= 1, length(end) >=1) | + | |
- | tmp <- range(x) | + | |
- | R <- end-start | + | |
- | k <- sqrt(abs(R)) | + | |
- | if (k < 5) k <- 5 # min value of k | + | |
- | h <- R/k | + | |
- | } | + | |
- | + | ||
- | # User defines ' | + | |
- | # (x, {k,} start, end, h, [breaks, right]) | + | |
- | else if (missing(k)) { | + | |
- | stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1) | + | |
- | } | + | |
- | + | ||
- | else stop(' | + | |
- | + | ||
- | if (histogram) { | + | |
- | x11() | + | |
- | par(mfrow=c(1, | + | |
- | title.histogram <- match.arg(title.histogram) | + | |
- | switch(title.histogram, | + | |
- | auto = titleH <- ' | + | |
- | none = titleH <- '' | + | |
- | } | + | |
- | tbl <- tb.make.table.I(x, | + | |
- | return(tbl) | + | |
- | } | + | |
- | + | ||
- | # Table form data.frames | + | |
- | tb.table.data.frame <- function(df, | + | |
- | k, | + | |
- | by, | + | |
- | breaks=c(' | + | |
- | right=FALSE, | + | |
- | histogram=TRUE, | + | |
- | title.histogram=c(' | + | |
- | { | + | |
- | stopifnot(is.data.frame(df)) | + | |
- | tmpList <- list() | + | |
- | nameF <- character() | + | |
- | nameY <- character() | + | |
- | # User didn't defines a factor | + | |
- | | + | |
- | | + | |
- | nHist <- length(logCol[logCol]) | + | |
- | if (histogram) { | + | |
- | count = 0 | + | |
- | if (nHist > 1) { | + | |
- | x11() | + | |
- | par(mfrow=c(4, 1)) | + | |
} | } | ||
- | } | + | else { # groups is not NULL |
- | for (i in 1:ncol(df)) { | + | if (!model.by.group) { |
- | if (logCol[i]) { | + | |
- | | + | mod <- lm(formula(model[i]), |
- | if (count == 5) { | + | |
- | | + | |
- | | + | summaries[[paste(model[i], ' |
- | | + | |
+ | zhat <- matrix(predict(mod, | ||
+ | | ||
+ | |||
+ | | ||
+ | | ||
+ | lit = F, front = ' | ||
+ | |||
+ | texts3d(min(x), | ||
+ | groups = levs[j])), paste(levs[j], | ||
+ | } | ||
+ | } | ||
+ | else { # model.by.group is TRUE | ||
+ | | ||
+ | |||
+ | | ||
+ | summaries[[paste(model[i], ' | ||
+ | |||
+ | | ||
+ | |||
+ | surface3d(xvals, | ||
+ | |||
+ | if (grid) | ||
+ | surface3d(xvals, | ||
+ | lit = F, front = ' | ||
+ | |||
+ | texts3d(min(x), | ||
+ | groups = levs[i])), paste(levs[i], | ||
} | } | ||
- | title.histogram <- match.arg(title.histogram) | ||
- | switch(title.histogram, | ||
- | auto = titleH <- names(logCol[i]), | ||
- | none = titleH <- '' | ||
- | x <- as.matrix(df[ ,i]) | ||
- | tbl <- tb.make.table.II(x, | ||
- | tmpList <- c(tmpList, list(tbl)) | ||
} | } | ||
} | } | ||
- | valCol <- logCol[logCol] | ||
- | names(tmpList) <- names(valCol) | ||
- | return(tmpList) | ||
} | } | ||
+ | if(simple.axes) { | ||
+ | axes3d(c(' | ||
+ | title3d(xlab = xlab, ylab = ylab, zlab = zlab) | ||
+ | } | ||
+ | else | ||
+ | decorate3d(xlab = xlab, ylab = ylab, zlab = zlab, box = box) | ||
- | | + | |
- | else { | + | |
- | namesdf | + | |
- | | + | |
- | stopifnot(is.factor((df[[pos]]))) | + | rgl.viewpoint(userMatrix |
- | nF <- table(df[[pos]]) | + | |
- | | + | |
- | | + | |
- | nDisGraph | + | |
- | if (histogram) { | + | |
- | count <- 0 | + | |
- | x11() | + | |
- | par(mfrow=c(4, | + | |
- | } | + | |
- | for(i in 1: | + | |
- | tmpdf <- subset(df, df[[pos]] == names(nF[i])) | + | |
- | logCol <- sapply(tmpdf, | + | |
- | for (j in 1: | + | |
- | if (logCol[j]) { | + | |
- | count | + | |
- | if (count == 13) { | + | |
- | | + | |
- | par(mfrow=c(4, | + | |
- | count <- 1 | + | |
- | } | + | |
- | nameF <- names(nF[i]) | + | |
- | nameY <- names(logCol[j]) | + | |
- | nameFY <- paste(nameF,' | + | |
- | title.histogram <- match.arg(title.histogram) | + | |
- | switch(title.histogram, | + | |
- | | + | |
- | none = titleH <- '' | + | |
- | x <- as.matrix(tmpdf[ | + | |
- | tbl <- tb.make.table.II(x, k, breaks, right, histogram, titleH) | + | |
- | newFY <- list(tbl) | + | |
- | names(newFY) <- sub(' +$', '', | + | |
- | tmpList | + | |
- | } | + | |
- | } | + | |
} | } | ||
} | } | ||
- | return(tmpList) | + | |
+ | | ||
+ | else | ||
+ | return(invisible(NULL)) | ||
} | } | ||
</ | </ | ||
- | == Testar | + | == Usando a função |
- | O script abaixo possibilita testar e aprender a usar a função tb.table. | + | |
< | < | ||
# | # | ||
- | # Name : | + | # Name : |
- | # Original author: Jose Cláudio | + | # Author |
- | # Date (dd/ | + | # Date (dd/ |
- | # Version | + | # Version |
- | # Aim : To learn how to use the function tb.table | + | # Aim : To plot 3d scatter, an or, surfaces with rgl package |
# | # | ||
- | # Observation | ||
- | # | ||
- | # 1.Tables | ||
- | # 1.1. Tables from vectors | ||
- | # | ||
- | |||
- | ## To debug | ||
- | # mtrace.off() | ||
- | # mtrace(tb.make.table.I) | ||
- | # mtrace(tb.make.table.II) | ||
- | # mtrace(tb.table.default) | ||
- | # mtrace(tb.table.data.frame) | ||
- | |||
- | # Make a vector | ||
- | set.seed(1) | ||
- | x=rnorm(150, | ||
- | |||
- | tb.table(x, his=F) | ||
- | tb.table(x) | ||
- | tb.table(x, title.his=' | ||
- | tb.table(x, k=10, his=T) | ||
- | |||
- | #Title | ||
- | tb.table(x, title.his=' | ||
- | tb.table(x, title.his=' | ||
- | tb.table(x, title.his=' | ||
- | |||
- | # Equal to above | ||
- | tb.table(x, breaks=' | ||
- | |||
- | # Equal to above | ||
- | tb.table(x, breaks=' | ||
- | |||
- | tb.table(x, breaks=' | ||
- | |||
- | # Equal to above | ||
- | tb.table(x, b=' | ||
- | |||
- | tb.table(x, breaks=' | ||
- | |||
- | # Equal to above | ||
- | tb.table(x, breaks=' | ||
- | |||
- | tb.table(x, breaks=' | ||
- | |||
- | # Will make a error! | ||
- | tb.table(x, breaks=' | ||
- | |||
- | tb.table(x, k=4) | ||
- | |||
- | tb.table(x, k=20) | ||
- | |||
- | # Partial | ||
- | tb.table(x, start=4, end=6) # Will make error! | ||
- | tb.table(x, start=4, end=6, his=F) | ||
- | |||
- | # Equal to above | ||
- | tb.table(x, s=4, e=6, his=F) | ||
- | |||
- | # Partial | ||
- | tb.table(x, start=4.5, end=5.5, his=F) | ||
- | |||
- | # Partial | ||
- | tb.table(x, start=5, end=6, h=.5, his=F) | ||
- | |||
- | # Nonsense | ||
- | tb.table(x, start=0, end=10, h=.5) | ||
- | |||
- | # First and last class forced (fi=0) | ||
- | tb.table(x, start=1, end=9, h=1) | ||
- | |||
- | tb.table(x, start=1, end=10, h=2) | ||
- | |||
- | |||
- | # | ||
- | # 1.2. Tables from data.frames | ||
- | # | ||
- | # Make a data.frame | ||
- | mdf=data.frame(X1 =rep(LETTERS[1: | ||
- | X2 =as.factor(rep(1: | ||
- | Y1 =c(NA, NA, rnorm(96, 10, 1), NA, NA), | ||
- | Y2 =rnorm(100, 60, 4), | ||
- | Y3 =rnorm(100, 50, 4), | ||
- | Y4 =rnorm(100, 40, 4)) | ||
- | |||
- | tb.table(mdf) | ||
- | |||
- | tb.table(mdf, | ||
- | |||
- | # Equal to above | ||
- | tb.table(mdf, | ||
- | |||
- | # Equal to above | ||
- | tb.table(mdf, | ||
- | |||
- | tb.table(mdf, | ||
- | |||
- | tb.table(mdf, | ||
- | |||
- | tb.table(mdf, | ||
- | |||
- | tb.table(mdf, | ||
- | |||
- | levels(mdf$X1) | ||
- | tbl = tb.table(mdf, | ||
- | length(tbl) | ||
- | names(tbl) | ||
- | tbl | ||
- | |||
- | tb.table(mdf, | ||
- | |||
- | # A ' | ||
- | tb.table(mdf, | ||
- | tb.table(mdf, breaks=' | + | # mtrace(plotlm3d) |
+ | # mtrace.off | ||
- | tb.table(iris, | + | # Example 1 |
+ | open3d() | ||
+ | rgl.bringtotop(stay = T) | ||
+ | with(iris, | ||
+ | surface | ||
+ | groups | ||
+ | xlab = ' | ||
+ | ylab = ' | ||
+ | zlab = ' | ||
+ | grid = F, | ||
+ | sphere.factor | ||
- | tb.table(iris, | + | # Example 2 |
+ | open3d() | ||
+ | rgl.bringtotop(stay = T) | ||
+ | with(iris, | ||
+ | model = c('z ~ x + y', | ||
+ | 'z ~ x + y + I(x^2) + I(y^2) + I(x*y)' | ||
+ | surface | ||
+ | groups | ||
+ | simple.axes | ||
+ | box = T, | ||
+ | xlab = ' | ||
+ | ylab = ' | ||
+ | zlab = ' | ||
+ | grid = F, | ||
+ | sphere.factor | ||
- | levels(iris$Species) | + | # Example 3 |
- | tbl=tb.table(iris, | + | open3d() |
- | length(tbl) | + | rgl.bringtotop(stay = T) |
- | names(tbl) | + | with(iris, |
- | tbl | + | model = c('z ~ x + y', |
+ | ' | ||
+ | | ||
+ | xlab = ' | ||
+ | ylab = ' | ||
+ | zlab = ' | ||
+ | grid = F, | ||
+ | sphere.factor = 1)) | ||
- | tb.table(iris, | + | # Example 4 |
+ | | ||
+ | rgl.bringtotop(stay = T) | ||
+ | with(iris, | ||
+ | | ||
+ | 'z ~ x + y + I(x^2) + I(y^2) + I(x*y)', | ||
+ | 'z ~ I(x^3) + I(y^3)' | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
- | tb.table(iris, breaks=' | + | # Example 5: Netter |
+ | x = c( 274, | ||
+ | | ||
+ | y = c(2450, 3254, 3802, 2838, 2347, 3782, 3008, 2450, 2137, 2560, | ||
+ | 4020, 4427, 2660, 2088, 2605) | ||
+ | z = c( 162, 120, 223, 131, | ||
+ | | ||
- | library(MASS) | + | mreg = lm(z ~ x + y) |
- | levels(Cars93$Origin) | + | ndata = data.frame(x = c(150, 274, 220, 370), y = c(4000, 2800, 3500, 3100)) |
- | tbl=tb.table(Cars93, k=5, by=' | + | zpred = predict(mreg, newdata |
- | names(tbl) | + | |
- | tbl | + | |
- | tb.table(Cars93, breaks='FD', | + | open3d() |
+ | rgl.bringtotop(stay = T) | ||
+ | plotlm3d(x, y, z, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | spheres3d(x = c(150, 274, 220, 370), y = c(4000, 2800, 3500, 3100), zpred, | ||
+ | col = ' | ||
</ | </ | ||