Abstraction for resampling strategies. Predefined resamplings are stored in mlr_resamplings.

Format

R6::R6Class object.

Construction

r = Resampling$new(id, param_set, param_vals)

Fields

  • id :: character(1)
    Stores the identifier of the learner.

  • param_set :: paradox::ParamSet
    Description of available hyperparameters and hyperparameter settings.

  • hash :: character(1)
    Hash (unique identifier) for this object.

  • instance :: any
    During instantiate(), the instance is stored in this slot. Types vary from resampling strategy to resampling strategy.

  • is_instantiated :: logical(1)
    Is TRUE, if the resampling has been instantiated.

  • duplicated_ids :: logical(1)
    Is TRUE if this resampling strategy may have duplicated row ids in a single training set or test set. E.g., this is TRUE for Bootstrap, and FALSE for cross validation.

  • iters :: integer(1)
    Return the number of resampling iterations, depending on the values stored in the param_set.

  • stratify :: character()
    Subset of target and feature names of the Task. Used to stratify during r$instantiate().

  • task_hash :: character(1)
    The hash of the task which was passed to r$instantiate().

Methods

See also

Other Resampling: mlr_resamplings

Examples

r = mlr_resamplings$get("subsampling") # Default parametrization r$param_set$values
#> $repeats #> [1] 30 #> #> $ratio #> [1] 0.6666667 #>
# Do only 3 repeats on 10% of the data r$param_set$values = list(ratio = 0.1, repeats = 3) r$param_set$values
#> $ratio #> [1] 0.1 #> #> $repeats #> [1] 3 #>
# Instantiate on iris task task = mlr_tasks$get("iris") r$instantiate(task) # Extract train/test sets train_set = r$train_set(1) print(train_set)
#> [1] 90 119 103 71 69 150 65 87 1 141 57 17 120 75 138
intersect(train_set, r$test_set(1))
#> integer(0)
# Another example: 10-fold CV r = mlr_resamplings$get("cv")$instantiate(task) r$train_set(1)
#> [1] 9 10 40 55 59 66 80 87 90 114 115 124 127 132 135 7 12 14 #> [19] 17 20 35 52 67 71 73 77 110 134 142 149 5 8 21 31 36 41 #> [37] 43 63 76 79 82 112 144 147 150 24 30 46 57 62 68 74 85 89 #> [55] 94 100 101 117 118 140 29 33 38 50 53 64 72 105 109 113 119 123 #> [73] 125 129 136 27 32 47 49 61 81 97 98 103 104 111 131 133 138 139 #> [91] 4 11 22 23 25 45 60 65 78 88 92 99 102 122 130 3 13 15 #> [109] 18 26 37 48 58 69 86 91 93 107 121 126 1 2 39 42 44 51 #> [127] 54 56 83 96 108 128 137 141 148
# Stratification task = mlr_tasks$get("pima") prop.table(table(task$truth())) # moderately unbalanced
#> #> neg pos #> 0.6510417 0.3489583
r = mlr_resamplings$get("subsampling") r$stratify = task$target_names # stratify on target column r$instantiate(task) prop.table(table(task$truth(r$train_set(1)))) # roughly same proportion
#> #> neg pos #> 0.6503906 0.3496094
prop.table(table(task$truth(r$train_set(1)))) # roughly same proportion # FIXME why two times?
#> #> neg pos #> 0.6503906 0.3496094