## Copyright (c) 2014 Takafumi Kanamori [kanamori@is.nagoya-u.ac.jp] ## All rights reserved. ## Redistribution and use in source and binary forms, with or without ## modification, are permitted provided that the following conditions are met: ## - Redistributions of source code must retain the above copyright notice, ## this list of conditions and the following disclaimer. ## - Redistributions in binary form must reproduce the above copyright notice, ## this list of conditions and the following disclaimer in the documentation ## and/or other materials provided with the distribution. ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ## ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE ## LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ## CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ## SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ## INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## POSSIBILITY OF SUCH DAMAGE. ## Kernel-based density ratio fitting: KuLSIF proposed by T. Kanamori, T. Suzuki, M. Sugiyama, ## Statistical analysis of kernel-based least-squares density-ratio estimation. ## Machine Learning, vol. 86, Issue 3, pp. 335-367, March 2012. ## required library: "kernlab" ## "snow" is available for parallel computing ## estimation of density ratio with Gaussian kernel ## sigma: a positive real-value or a vector. For vector, LOOCV is automatically applied. ## lambda: a positive real-value or a vector. For vector, LOOCV is automatically applied. ## dat: de(denominator), nu(numerator). de.data \sim p, nu.data \sim q. (density ratio: r=q/p) KuLSIF <- function(de,nu, ##sigma=sigest(rbind(de,nu))[2], sigma=sigest(nu)[2], ## band-width of Gaussian kernel lambda=1/min(nrow(de),nrow(nu))^0.9, loocv=TRUE, direct=TRUE, cluster=NULL, method="BFGS", control=list(maxit=4000,reltol=10^(-6)), frac=1) { if(is.vector(de)) de <- matrix(de,length(de),1) if(is.vector(nu)) nu <- matrix(nu,length(nu),1) if(ncol(de)!=ncol(nu)) stop("Dimensions of two data sets do not match!!") if(frac<1){ nde <- ceiling(nrow(de)*frac) de <- de[sort(sample(1:nrow(de),nde)),] } hparameters <- expand.grid(sigma,lambda) dimnames(hparameters)[[2]] <- c("sigma","lambda") ## select solver ## direct=TRUE ## apply solve() if(nrow(de)>=8000) direct=FALSE ## apply optim() dat <- list(de=de,nu=nu) if((!loocv)&(length(lambda)==1)&(length(sigma)==1)){ res <- KuLSIF.single(dat,sigma=sigma,lambda=lambda,direct=direct,method=method,control=control) res$lcv <- NULL; res$lcv.min <- NULL; }else{ lcv <- LOOCV_KuLSIF(dat,sigma,lambda,cluster=cluster) res <- KuLSIF.single(dat,sigma=lcv$opt_sigma,lambda=lcv$opt_lambda,direct=direct,method=method,control=control) res$lcv <- lcv$cv; res$lcv.min <- min(lcv$cv); } res$sigma.list <- sigma; res$lambda.list <- lambda; res$hyper_para <- hparameters class(res) <- 'KuLSIF'; res } ## prediction ## res: output of KuLSIF() ## newdat=matrix(,n,dim): data matrix to be evaluated ## output=list(r=,nr): r = density ratio, nr = normalized density ratio predict.KuLSIF <- function(res,newdat){ if(is.vector(newdat)) newdat <- matrix(newdat,length(newdat),1) if(ncol(res$dat$de)!=ncol(newdat)) stop("dimension does not match!!") sigma <- res$sigma; lambda <- res$lambda west_f <- kernelMult(rbfdot(sigma=sigma),newdat,res$dat$de,res$par) west_s <- kernelMult(rbfdot(sigma=sigma),newdat,res$dat$nu,rep(1/(nrow(res$dat$nu)*lambda),nrow(res$dat$nu))) west <- west_f + west_s DRest <- c(pmax(west,0)) list(r=DRest,nr=DRest/mean(DRest)) } #################### ## misc functions ## #################### KuLSIF.single <- function(dat,sigma=sigest(rbind(dat$de,dat$nu))[2], lambda=1/min(nrow(dat$de),nrow(dat$nu))^0.9, direct=TRUE, method="BFGS", control=list(maxit=4000,reltol=10^(-6))){ n <- nrow(dat$de); m <- nrow(dat$nu) Kxx <- kernelMatrix(rbfdot(sigma=sigma),dat$de) Kxy <- kernelMatrix(rbfdot(sigma=sigma),dat$de,dat$nu) if(direct){ par <- c(solve(Kxx+n*lambda*diag(n),-rowMeans(Kxy)/lambda)) }else{ fn <- function(alpha) sum((Kxx%*%alpha)*alpha)/2+n*lambda*sum(alpha^2)/2+sum(alpha*rowMeans(Kxy))/lambda gr <- function(alpha) c(Kxx%*%alpha+n*lambda*alpha+rowMeans(Kxy)/lambda) par <- rep(0,n) op <- optim(par,fn,gr,method=method,control=control) par <- op$par } res <- list(par=par,sigma=sigma,lambda=lambda,dat=dat) } ## parallel computation of LOOCV for KuLSIF with lambda list ## ncore <- 8 ## cluster <- makeCluster(ncore, type = "SOCK") ## clusterExport(cluster,c("kernelMatrix","rbfdot")) LOOCV_KuLSIF <- function(dat,sigma=sigest(rbind(dat$de,dat$nu))[2], lambda.list=1/min(nrow(dat$de),nrow(dat$nu))^0.9,cluster=NULL){ hparameters <- expand.grid(sigma,lambda.list) dimnames(hparameters)[[2]] <- c("sigma","lambda") if (is.null(cluster)){ cvlist <- apply(hparameters,1,LOOCV_KuLSIF.single_lambda,dat) }else{ clusterExport(cluster,c("kernelMatrix","rbfdot")) cvlist <- parApply(cluster,hparameters,1,LOOCV_KuLSIF.single_lambda,dat) } opt_para <- hparameters[which.min(cvlist),] list(lambda=lambda.list,sigma=sigma,cv=cvlist,opt_sigma=as.double(opt_para[1]),opt_lambda=as.double(opt_para[2])) } ## hpara = c(sigma, lambda) LOOCV_KuLSIF.single_lambda <- function(hpara,dat){ sigma <- hpara[1]; lambda <- hpara[2]; n <- nrow(dat$de); m <- nrow(dat$nu) E <- matrix(1,m,min(n,m)); diag(E) <- rep(0,min(n,m)) Kxx <- kernelMatrix(rbfdot(sigma=sigma),dat$de) Kxy <- kernelMatrix(rbfdot(sigma=sigma),dat$de,dat$nu) Kyy <- kernelMatrix(rbfdot(sigma=sigma),dat$nu) ## modKxy <- (Kxy[,1:min(n,m)]-rowSums(Kxy)) G <- solve(Kxx+(n-1)*lambda*diag(1,n)) S <- modKxy/((m-1)*lambda) GS <- G%*%S diagT <- (diag(GS)[1:min(n,m)])/(diag(G)[1:min(n,m)]) A <- GS-t(G[1:min(n,m),]*diagT); B <- E/((m-1)*lambda) wx <- pmax(rowSums((cbind(Kxx,Kxy)[1:min(n,m),])*cbind(t(A),t(B))),0) wy <- pmax(rowSums((cbind(t(Kxy),Kyy)[1:min(n,m),])*cbind(t(A),t(B))),0) ## (sum(wx^2)/2-sum(wy))/min(n,m) }