From 5b32e6ef110184b6854df9ae5e4c7b800106da61 Mon Sep 17 00:00:00 2001 From: mb706 Date: Sat, 5 Mar 2016 23:10:04 +0100 Subject: [PATCH 1/4] Fix #201: focussearch now handles discrete vector parameters also general focussearch bugfixes --- R/infillOptFocus.R | 77 +++++++++++++++++++------- tests/testthat/test_infill_opt_focus.R | 5 ++ 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/R/infillOptFocus.R b/R/infillOptFocus.R index e66485f18..abaf0f6e9 100644 --- a/R/infillOptFocus.R +++ b/R/infillOptFocus.R @@ -13,6 +13,14 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig for (restart.iter in seq_len(control$infill.opt.restarts)) { # copy parset so we can shrink it ps.local = par.set + + # handle discrete vectors: + # The problem is that for discrete vectors, we can't adjust the values dimension-wise. + # Therefore, for discrete vectors, we always drop the last level and instead have a + # mapping that maps, for each discrete vector param and for each dimension, from + # the sampled value (levels 1 to n - #(dropped levels)) to levels with random dropouts. + discreteVectorMapping = lapply(filterParams(par.set, type = c("discretevector", "logicalvector"))$pars, + function(param) rep(list(setNames(names(param$values), names(param$values))), param$len)) # do iterations where we focus the region-of-interest around the current best point for (local.iter in seq_len(control$infill.opt.focussearch.maxit)) { @@ -21,13 +29,22 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig # convert to param encoding our model was trained on and can use newdesign = convertDataFrameCols(newdesign, ints.as.num = TRUE, logicals.as.factor = TRUE) - y = infill.crit(newdesign, models, control, ps.local, design, iter, ...) + + # handle discrete vectors + for (dvparam in filterParams(par.set, type = c("discretevector", "logicalvector"))$pars) { + for (dimnum in seq_len(dvparam$len)) { + dfindex = paste0(dvparam$id, dimnum) + mapping = discreteVectorMapping[[dvparam$id]][[dimnum]] + levels(newdesign[[dfindex]]) = mapping[levels(newdesign[[dfindex]])] + } + } + y = infill.crit(newdesign, models, control, par.set, design, iter, ...) # get current best value local.index = getMinIndex(y, ties.method = "random") local.y = y[local.index] local.x.df = newdesign[local.index, , drop = FALSE] - local.x.list = dfRowToList(recodeTypes(local.x.df, ps.local), ps.local, 1) + local.x.list = dfRowToList(recodeTypes(local.x.df, par.set), par.set, 1) # if we found a new best value, store it if (local.y < global.y) { @@ -39,24 +56,46 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig ps.local$pars = lapply(ps.local$pars, function(par) { # only shrink when there is a value val = local.x.list[[par$id]] - if (!isScalarNA(val)) { - if (isNumeric(par)) { - # shrink to range / 2, centered at val - range = par$upper - par$lower - par$lower = pmax(par$lower, val - (range / 4)) - par$upper = pmin(par$upper, val + (range / 4)) - if (isInteger(par)) { - par$lower = floor(par$lower) - par$upper = ceiling(par$upper) + if (isScalarNA(val)) { + return(par) + } + if (isNumeric(par)) { + # shrink to range / 2, centered at val + range = par$upper - par$lower + par$lower = pmax(par$lower, val - (range / 4)) + par$upper = pmin(par$upper, val + (range / 4)) + if (isInteger(par)) { + par$lower = floor(par$lower) + par$upper = ceiling(par$upper) + } + } else if (isDiscrete(par)) { + # randomly drop a level, which is not val + if (length(par$values) <= 1L) { + return(par) + } + # need to do some magic to handle discrete vectors + if (par$type %nin% c("discretevector", "logicalvector")) { + val.names = names(par$values) + # remove current val from delete options, should work also for NA + val.names = val.names[!sapply(par$values, identical, y=val)] # remember, 'val' may not even be a character + to.del = sample(val.names, 1) + par$values[to.del] = NULL + } else { + # we remove the last element of par$values and a random element for + # each dimension in discreteVectorMapping. + par$values = par$values[-length(par$values)] + if (par$type != "logicalvector") { + # for discretevectorparam val would be a list; convert to character vector + val = names(val) } - } else if (isDiscrete(par)) { - # randomly drop a level, which is not val - if (length(par$values) > 1L) { - val.names = names(par$values) - # remove current val from delete options, should work also for NA - val.names = setdiff(val.names, val) - to.del = sample(seq_along(val.names), 1) - par$values = par$values[-to.del] + for (dimnum in seq_len(par$len)) { + val.names = discreteVectorMapping[[par$id]][[dimnum]] + newmap = val.names + val.names = val.names[val.names != val[dimnum]] + to.del = sample(val.names, 1) + newmap = newmap[newmap != to.del] + names(newmap) = names(par$values) + discreteVectorMapping[[par$id]][[dimnum]] <<- newmap } } } diff --git a/tests/testthat/test_infill_opt_focus.R b/tests/testthat/test_infill_opt_focus.R index 3f9ea5483..b0d5d6990 100644 --- a/tests/testthat/test_infill_opt_focus.R +++ b/tests/testthat/test_infill_opt_focus.R @@ -68,6 +68,9 @@ test_that("complex param space, dependencies, focusing, restarts", { if(x$disc2 == 'a') tmp3 = log(x$realA) + x$intA^4 + ifelse(x$discA == 'm', 5, 0) if(x$disc2 == 'b') tmp3 = exp(x$realB) + ifelse(x$discB == 'R', sin(x$realBR), sin(x$realBNR)) if(x$disc2 == "c") tmp3 = 500 + assert(is.list(x$discVec)) + assert(x$discVec[[1]] %in% c("a", "b", "c")) + assert(x$discScal %in% c("x", "y", "z")) tmp1 + tmp2 + tmp3 }, par.set = makeParamSet( @@ -75,6 +78,8 @@ test_that("complex param space, dependencies, focusing, restarts", { makeIntegerParam("int1", lower = -100, upper = 100), makeNumericVectorParam("realVec", len = 10, lower = -50, upper = 50), makeIntegerVectorParam("intVec", len = 3, lower = 0, upper = 100), + makeDiscreteVectorParam("discVec", len = 3, c(x = "a", y = "b", z = "c")), + makeDiscreteParam("discScal", c(a = "x", b = "y", c = "z")), makeNumericParam("real2", lower = -1, upper = 1), makeDiscreteParam("disc1", values = c("foo", "bar"), requires = quote(real2 < 0)), makeNumericParam("real3", lower = -100, upper = 100, requires = quote(real2 > 0)), From d451c633a30dc2c7f257ed222142f3c3563e5d5f Mon Sep 17 00:00:00 2001 From: mb706 Date: Mon, 7 Mar 2016 21:01:50 +0100 Subject: [PATCH 2/4] More concise handling of discrete vector params in infillOptFocus Parameters that have discrete vector params in their requirement can not be handled by infillOptFocus, so trying to do that now gives an error. --- R/infillOptFocus.R | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/R/infillOptFocus.R b/R/infillOptFocus.R index abaf0f6e9..483fc2890 100644 --- a/R/infillOptFocus.R +++ b/R/infillOptFocus.R @@ -8,8 +8,16 @@ # See infillOptCMAES.R for interface explanation. infillOptFocus = function(infill.crit, models, control, par.set, opt.path, design, iter, ...) { global.y = Inf + + allRequirements = extractSubList(par.set$pars, "requires", simplify = FALSE) + allUsedVars = unique(do.call(base::c, sapply(allRequirements, all.vars))) + forbiddenVars = getParamIds(filterParams(par.set, type = c("discretevector", "logicalvector"))) + if (any(allUsedVars%in% forbiddenVars)) { + stop("Cannot do focus search when some variables have requirements that depend on discrete or logical vector parameters.") + } + - # restart the whole crap some times + # perform multiple starts for (restart.iter in seq_len(control$infill.opt.restarts)) { # copy parset so we can shrink it ps.local = par.set @@ -21,6 +29,14 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig # the sampled value (levels 1 to n - #(dropped levels)) to levels with random dropouts. discreteVectorMapping = lapply(filterParams(par.set, type = c("discretevector", "logicalvector"))$pars, function(param) rep(list(setNames(names(param$values), names(param$values))), param$len)) + discreteVectorMapping = do.call(base::c, discreteVectorMapping) + # the resulting object is NULL if there are no discrete / logical vector params + + if (!is.null(discreteVectorMapping)) { + mappedPars = filterParams(par.set, type = c("discretevector", "logicalvector")) + names(discreteVectorMapping) = getParamIds(mappedPars, with.nr = TRUE, repeated = TRUE) + } + # do iterations where we focus the region-of-interest around the current best point for (local.iter in seq_len(control$infill.opt.focussearch.maxit)) { @@ -31,13 +47,11 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig newdesign = convertDataFrameCols(newdesign, ints.as.num = TRUE, logicals.as.factor = TRUE) # handle discrete vectors - for (dvparam in filterParams(par.set, type = c("discretevector", "logicalvector"))$pars) { - for (dimnum in seq_len(dvparam$len)) { - dfindex = paste0(dvparam$id, dimnum) - mapping = discreteVectorMapping[[dvparam$id]][[dimnum]] - levels(newdesign[[dfindex]]) = mapping[levels(newdesign[[dfindex]])] - } + for (dfindex in names(discreteVectorMapping)) { + mapping = discreteVectorMapping[[dfindex]] + levels(newdesign[[dfindex]]) = mapping[levels(newdesign[[dfindex]])] } + y = infill.crit(newdesign, models, control, par.set, design, iter, ...) # get current best value @@ -77,9 +91,9 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig if (par$type %nin% c("discretevector", "logicalvector")) { val.names = names(par$values) # remove current val from delete options, should work also for NA - val.names = val.names[!sapply(par$values, identical, y=val)] # remember, 'val' may not even be a character + val.names = val.names[!sapply(par$values, identical, y=val)] # remember, 'val' can be any type to.del = sample(val.names, 1) - par$values[to.del] = NULL + par$values[[to.del]] = NULL } else { # we remove the last element of par$values and a random element for # each dimension in discreteVectorMapping. @@ -89,13 +103,13 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig val = names(val) } for (dimnum in seq_len(par$len)) { - val.names = discreteVectorMapping[[par$id]][[dimnum]] - newmap = val.names + dfindex = paste0(par$id, dimnum) + newmap = val.names = discreteVectorMapping[[dfindex]] val.names = val.names[val.names != val[dimnum]] to.del = sample(val.names, 1) newmap = newmap[newmap != to.del] names(newmap) = names(par$values) - discreteVectorMapping[[par$id]][[dimnum]] <<- newmap + discreteVectorMapping[[dfindex]] <<- newmap } } } From ecdeca83e72d0126fa1cd2e0544f8f8f6beedbd0 Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 8 Mar 2016 13:36:13 +0100 Subject: [PATCH 3/4] Cleaning up code. --- R/infillOptFocus.R | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/R/infillOptFocus.R b/R/infillOptFocus.R index 483fc2890..d27cd76b2 100644 --- a/R/infillOptFocus.R +++ b/R/infillOptFocus.R @@ -9,10 +9,12 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, design, iter, ...) { global.y = Inf + discreteVectorPars = filterParams(par.set, type = c("discretevector", "logicalvector")) + allRequirements = extractSubList(par.set$pars, "requires", simplify = FALSE) - allUsedVars = unique(do.call(base::c, sapply(allRequirements, all.vars))) - forbiddenVars = getParamIds(filterParams(par.set, type = c("discretevector", "logicalvector"))) - if (any(allUsedVars%in% forbiddenVars)) { + allRequirementVars = unique(unlist(lapply(allRequirements, all.vars))) + forbiddenRequirementVars = getParamIds(discreteVectorPars) + if (any(allRequirementVars %in% forbiddenRequirementVars)) { stop("Cannot do focus search when some variables have requirements that depend on discrete or logical vector parameters.") } @@ -22,19 +24,23 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig # copy parset so we can shrink it ps.local = par.set - # handle discrete vectors: - # The problem is that for discrete vectors, we can't adjust the values dimension-wise. - # Therefore, for discrete vectors, we always drop the last level and instead have a - # mapping that maps, for each discrete vector param and for each dimension, from - # the sampled value (levels 1 to n - #(dropped levels)) to levels with random dropouts. - discreteVectorMapping = lapply(filterParams(par.set, type = c("discretevector", "logicalvector"))$pars, + # Handle discrete vectors (and logical vectors): + # The problem is that for discrete vectors, we can't adjust the range dimension-wise. + # Instead we store the range of each discrete vectorparameter dimension in the list of named characters + # `discreteVectorMapping`. In each iteration a random value (that does not contain + # the optimum) is dropped from each vector on this list. The $values of the parameters in the parameterset also + # need to be modified to reflect the reduced range: from them, always the last value is dropped. + # Then `discreteVectorMapping` is a mapping that maps, for each discrete vector param dimension + # with originally n values, from the sampled value (levels 1 to n - #(dropped levels)) to the acutal levels with + # random dropouts. + # + # Since the requirements of the param set are queried while generating the design, this breaks if + # there are requirements depending on discrete vector parameters. + discreteVectorMapping = lapply(discreteVectorPars$pars, function(param) rep(list(setNames(names(param$values), names(param$values))), param$len)) - discreteVectorMapping = do.call(base::c, discreteVectorMapping) - # the resulting object is NULL if there are no discrete / logical vector params - - if (!is.null(discreteVectorMapping)) { - mappedPars = filterParams(par.set, type = c("discretevector", "logicalvector")) - names(discreteVectorMapping) = getParamIds(mappedPars, with.nr = TRUE, repeated = TRUE) + discreteVectorMapping = unlist(discreteVectorMapping, recursive=FALSE) + if (!isEmpty(discreteVectorPars)) { + names(discreteVectorMapping) = getParamIds(discreteVectorPars, with.nr = TRUE, repeated = TRUE) } From 15b7e6c833a40436a9f85f729e61c3a8cfd05458 Mon Sep 17 00:00:00 2001 From: Jakob Richter Date: Mon, 19 Sep 2016 16:00:02 +0200 Subject: [PATCH 4/4] naming according to styleguide --- R/infillOptFocus.R | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/R/infillOptFocus.R b/R/infillOptFocus.R index d27cd76b2..6a1c91731 100644 --- a/R/infillOptFocus.R +++ b/R/infillOptFocus.R @@ -9,12 +9,11 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, design, iter, ...) { global.y = Inf - discreteVectorPars = filterParams(par.set, type = c("discretevector", "logicalvector")) + discrete.vector.pars = filterParams(par.set, type = c("discretevector", "logicalvector")) - allRequirements = extractSubList(par.set$pars, "requires", simplify = FALSE) - allRequirementVars = unique(unlist(lapply(allRequirements, all.vars))) - forbiddenRequirementVars = getParamIds(discreteVectorPars) - if (any(allRequirementVars %in% forbiddenRequirementVars)) { + all.requirements = extractSubList(par.set$pars, "requires", simplify = FALSE) + all.requirement.vars = unique(unlist(lapply(all.requirements, all.vars))) + if (any(all.requirement.vars %in% getParamIds(discrete.vector.pars))) { stop("Cannot do focus search when some variables have requirements that depend on discrete or logical vector parameters.") } @@ -27,20 +26,20 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig # Handle discrete vectors (and logical vectors): # The problem is that for discrete vectors, we can't adjust the range dimension-wise. # Instead we store the range of each discrete vectorparameter dimension in the list of named characters - # `discreteVectorMapping`. In each iteration a random value (that does not contain + # `discrete.vector.mapping`. In each iteration a random value (that does not contain # the optimum) is dropped from each vector on this list. The $values of the parameters in the parameterset also # need to be modified to reflect the reduced range: from them, always the last value is dropped. - # Then `discreteVectorMapping` is a mapping that maps, for each discrete vector param dimension + # Then `discrete.vector.mapping` is a mapping that maps, for each discrete vector param dimension # with originally n values, from the sampled value (levels 1 to n - #(dropped levels)) to the acutal levels with # random dropouts. # # Since the requirements of the param set are queried while generating the design, this breaks if # there are requirements depending on discrete vector parameters. - discreteVectorMapping = lapply(discreteVectorPars$pars, + discrete.vector.mapping = lapply(discrete.vector.pars$pars, function(param) rep(list(setNames(names(param$values), names(param$values))), param$len)) - discreteVectorMapping = unlist(discreteVectorMapping, recursive=FALSE) - if (!isEmpty(discreteVectorPars)) { - names(discreteVectorMapping) = getParamIds(discreteVectorPars, with.nr = TRUE, repeated = TRUE) + discrete.vector.mapping = unlist(discrete.vector.mapping, recursive=FALSE) + if (!isEmpty(discrete.vector.pars)) { + names(discrete.vector.mapping) = getParamIds(discrete.vector.pars, with.nr = TRUE, repeated = TRUE) } @@ -53,8 +52,8 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig newdesign = convertDataFrameCols(newdesign, ints.as.num = TRUE, logicals.as.factor = TRUE) # handle discrete vectors - for (dfindex in names(discreteVectorMapping)) { - mapping = discreteVectorMapping[[dfindex]] + for (dfindex in names(discrete.vector.mapping)) { + mapping = discrete.vector.mapping[[dfindex]] levels(newdesign[[dfindex]]) = mapping[levels(newdesign[[dfindex]])] } @@ -102,7 +101,7 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig par$values[[to.del]] = NULL } else { # we remove the last element of par$values and a random element for - # each dimension in discreteVectorMapping. + # each dimension in discrete.vector.mapping. par$values = par$values[-length(par$values)] if (par$type != "logicalvector") { # for discretevectorparam val would be a list; convert to character vector @@ -110,12 +109,12 @@ infillOptFocus = function(infill.crit, models, control, par.set, opt.path, desig } for (dimnum in seq_len(par$len)) { dfindex = paste0(par$id, dimnum) - newmap = val.names = discreteVectorMapping[[dfindex]] + newmap = val.names = discrete.vector.mapping[[dfindex]] val.names = val.names[val.names != val[dimnum]] to.del = sample(val.names, 1) newmap = newmap[newmap != to.del] names(newmap) = names(par$values) - discreteVectorMapping[[dfindex]] <<- newmap + discrete.vector.mapping[[dfindex]] <<- newmap } } }