# Copyright 2008 Alexander Weaver Blocker
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# 
# Author: Alexander Weaver Blocker
# Description: A set of R utilties to perform batch & online bagging
# 	(bootstrap aggregation) for linear models. Plans include extension
# 	to general linear models.

# Primary function for batch bagging lm; also serves to initialize online
# 	bagging algorithm
bagginglm <- function(formula, nmodels=30, aggweights=NULL,
	data, subset, weights, ...)
{
	if (nmodels<1) stop("nmodels must be positive integer")
	# Setup model frame and calls
	mf <- match.call(expand.dots = TRUE)
	m <- match(c("formula", "data", "subset", "weights", "na.action", 
		"offset"), names(mf), 0L)
	mfr <- mf[c(1L,m)]
	mf <- mf[m]
	mfr$drop.unused.levels <- TRUE
	mfr[[1L]] <- as.name("model.frame")
	mfr <- eval(mfr, parent.frame())
	w <- as.vector(model.weights(mfr))
	
	# Setup parameters for estimation
	nobs <- nrow(mfr)
	if (is.null(w))
		w <- rep(1, nobs)
	bootind <- sapply(1:nmodels, function(x)
		sample(nobs, nobs, replace=TRUE))
	
	# Run models
	modellist <- apply(bootind, 2, .runmodel, formula=formula,
		data=mfr, ...)
	
	# Obtain and aggregate coefficients
	allcoef <- sapply(modellist, coef)
	if (is.null(aggweights))
		coefficients <- rowMeans(allcoef)
	else
		coefficients <- rowMeans( allcoef %*%
			diag(aggweights/sum(aggweights)) )
			
	result <- list(modellist=modellist, allcoef=allcoef,
		coefficients=coefficients)
	class(result) <- "bagginglm"
	result
}

# Internal function to run models
.runmodel <- function(ind, formula, data, w, ...) {
	if ( sum(complete.cases(data[ind,]))==0 )
		return( NA )
	else
		return( lm(formula, data=data[ind,], #weights=w[ind],
			...) )
}