# Copyright 2008 Alexander Weaver Blocker
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# Author: Alexander Weaver Blocker
# Description: A set of R utilties to perform batch & online bagging
# (bootstrap aggregation) for linear models. Plans include extension
# to general linear models.
# Primary function for batch bagging lm; also serves to initialize online
# bagging algorithm
bagginglm <- function(formula, nmodels=30, aggweights=NULL,
data, subset, weights, ...)
{
if (nmodels<1) stop("nmodels must be positive integer")
# Setup model frame and calls
mf <- match.call(expand.dots = TRUE)
m <- match(c("formula", "data", "subset", "weights", "na.action",
"offset"), names(mf), 0L)
mfr <- mf[c(1L,m)]
mf <- mf[m]
mfr$drop.unused.levels <- TRUE
mfr[[1L]] <- as.name("model.frame")
mfr <- eval(mfr, parent.frame())
w <- as.vector(model.weights(mfr))
# Setup parameters for estimation
nobs <- nrow(mfr)
if (is.null(w))
w <- rep(1, nobs)
bootind <- sapply(1:nmodels, function(x)
sample(nobs, nobs, replace=TRUE))
# Run models
modellist <- apply(bootind, 2, .runmodel, formula=formula,
data=mfr, ...)
# Obtain and aggregate coefficients
allcoef <- sapply(modellist, coef)
if (is.null(aggweights))
coefficients <- rowMeans(allcoef)
else
coefficients <- rowMeans( allcoef %*%
diag(aggweights/sum(aggweights)) )
result <- list(modellist=modellist, allcoef=allcoef,
coefficients=coefficients)
class(result) <- "bagginglm"
result
}
# Internal function to run models
.runmodel <- function(ind, formula, data, w, ...) {
if ( sum(complete.cases(data[ind,]))==0 )
return( NA )
else
return( lm(formula, data=data[ind,], #weights=w[ind],
...) )
}