# Copyright 2008 Alexander Weaver Blocker # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # Author: Alexander Weaver Blocker # Description: A set of R utilties to perform batch & online bagging # (bootstrap aggregation) for linear models. Plans include extension # to general linear models. # Primary function for batch bagging lm; also serves to initialize online # bagging algorithm bagginglm <- function(formula, nmodels=30, aggweights=NULL, data, subset, weights, ...) { if (nmodels<1) stop("nmodels must be positive integer") # Setup model frame and calls mf <- match.call(expand.dots = TRUE) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mfr <- mf[c(1L,m)] mf <- mf[m] mfr$drop.unused.levels <- TRUE mfr[[1L]] <- as.name("model.frame") mfr <- eval(mfr, parent.frame()) w <- as.vector(model.weights(mfr)) # Setup parameters for estimation nobs <- nrow(mfr) if (is.null(w)) w <- rep(1, nobs) bootind <- sapply(1:nmodels, function(x) sample(nobs, nobs, replace=TRUE)) # Run models modellist <- apply(bootind, 2, .runmodel, formula=formula, data=mfr, ...) # Obtain and aggregate coefficients allcoef <- sapply(modellist, coef) if (is.null(aggweights)) coefficients <- rowMeans(allcoef) else coefficients <- rowMeans( allcoef %*% diag(aggweights/sum(aggweights)) ) result <- list(modellist=modellist, allcoef=allcoef, coefficients=coefficients) class(result) <- "bagginglm" result } # Internal function to run models .runmodel <- function(ind, formula, data, w, ...) { if ( sum(complete.cases(data[ind,]))==0 ) return( NA ) else return( lm(formula, data=data[ind,], #weights=w[ind], ...) ) }