[R-package] miscellaneous changes to comply with CRAN requirements (#…

…3338) * [R-package] update DESCRIPTION per CRAN comments * newlines * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * more fixes * update Rbuildignore * more changes * more changes per CRAN response * add email * run examples in CI * add newest CRAN response * add Solaris patch * update patch * another attempt at ifaddrs patch * fix unnecessary comment * update configure * comments * bump version * tabs * fix address alignment, required by cran (#3415) * fix dataset binary file alignment * many fixes * fix warnings * fix bug * Update file_io.cpp * Update file_io.cpp * simplify code * Apply suggestions from code review * general * remove unneeded alignment * Update file_io.h * int32 to byte8 alignment * Apply suggestions from code review * Apply suggestions from code review * [R-package] add new copyright holder in DESCRIPTION (#3409) * [R-package] add new copyright holder in DESCRIPTION * fix role * fixing conflicts * [R-package] add new copyright holder in DESCRIPTION (#3409) * [R-package] add new copyright holder in DESCRIPTION * fix role * trying to fix conflicts * more fixes * this will work * update cran-comments * simplify solaris, add more testing docs * stuff * remove rchck docs * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> * remove extra use of cat() * change solaris check * update docs * remove testing code * fix warning about cleanup not having execute permissions * fix cmake builds * remove blank line Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: Guolin Ke <guolin.ke@outlook.com>
microsoft · Oct 8, 2020 · 186711d · 186711d
1 parent 7a51ae0
commit 186711d
Show file tree

Hide file tree

Showing 61 changed files with 706 additions and 104 deletions.
diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R
@@ -53,7 +53,8 @@ LINTERS_TO_USE <- list(
     , "true_false"           = lintr::T_and_F_symbol_linter
     , "undesirable_function" = lintr::undesirable_function_linter(
         fun = c(
-            "cbind" = paste0(
+            "cat" = "CRAN forbids the use of cat() in packages except in special cases. Use message() or warning()."
+            , "cbind" = paste0(
                 "cbind is an unsafe way to build up a data frame. merge() or direct "
                 , "column assignment is preferred."
             )
@@ -85,7 +86,7 @@ LINTERS_TO_USE <- list(
     , "unneeded_concatenation" = lintr::unneeded_concatenation_linter
 )
 
-cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
+print(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT)))
 
 results <- NULL
 

diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
@@ -165,7 +165,7 @@ check_succeeded="yes"
 (
     R CMD check ${PKG_TARBALL} \
         --as-cran \
-        --run-dontrun \
+        --run-donttest \
     || check_succeeded="no"
 ) &
 

diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1
@@ -147,9 +147,9 @@ if ($env:COMPILER -ne "MSVC") {
   Write-Output "Running R CMD check"
   if ($env:R_BUILD_TYPE -eq "cran") {
     # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit)
-    $check_args = "c('CMD', 'check', '--as-cran', '--run-dontrun', '$PKG_FILE_NAME')"
+    $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
   } else {
-    $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-dontrun', '$PKG_FILE_NAME')"
+    $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')"
   }
   Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE, error_on_status = TRUE)" ; $check_succeeded = $?
 

diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore
@@ -3,7 +3,7 @@ AUTOCONF_UBUNTU_VERSION
 ^.*\.bin
 ^build_r.R$
 ^cran-comments\.md$
-^docs/.*$
+^docs$
 ^.*\.dll
 \.gitkeep$
 ^.*\.history

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
@@ -6,11 +6,25 @@ Date: ~~DATE~~
 Authors@R: c(
     person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")),
     person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("aut")),
-    person("Yachen", "Yan", role = c("ctb")),
     person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")),
+    person("Qi", "Meng", role = c("aut")),
+    person("Thomas", "Finley", role = c("aut")),
+    person("Taifeng", "Wang", role = c("aut")),
+    person("Wei", "Chen", role = c("aut")),
+    person("Weidong", "Ma", role = c("aut")),
+    person("Qiwei", "Ye", role = c("aut")),
+    person("Tie-Yan", "Liu", role = c("aut")),
+    person("Yachen", "Yan", role = c("ctb")),
+    person("Microsoft Corporation", role = c("cph")),
+    person("Dropbox, Inc.", role = c("cph")),
+    person("Jay", "Loden", role = c("cph")),
+    person("Dave", "Daeschler", role = c("cph")),
+    person("Giampaolo", "Rodola", role = c("cph")),
     person("IBM Corporation", role = c("ctb"))
     )
-Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, and this package offers an R interface to work with it.
+Description: Tree based algorithms can be improved by introducing boosting frameworks. 
+    'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) <https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision>.
+    This package offers an R interface to work with it.
     It is designed to be distributed and efficient with the following advantages:
         1. Faster training speed and higher efficiency.
         2. Lower memory usage.

diff --git a/R-package/LICENSE b/R-package/LICENSE
@@ -1,21 +1,2 @@
-The MIT License (MIT)
-
-Copyright (c) Microsoft Corporation
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+YEAR: 2016
+COPYRIGHT HOLDER: Microsoft Corporation
diff --git a/R-package/R/callback.R b/R-package/R/callback.R
@@ -150,7 +150,7 @@ merge.eval.string <- function(env) {
 
   }
 
-  paste0(msg, collapse = "\t")
+  paste0(msg, collapse = "  ")
 
 }
 
@@ -173,7 +173,7 @@ cb.print.evaluation <- function(period = 1L) {
 
         # Check if message is existing
         if (nchar(msg) > 0L) {
-          cat(merge.eval.string(env = env), "\n")
+          print(merge.eval.string(env = env))
         }
 
       }
@@ -284,7 +284,12 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
 
     # Check if verbose or not
     if (isTRUE(verbose)) {
-      cat("Will train until there is no improvement in ", stopping_rounds, " rounds.\n\n", sep = "")
+      msg <- paste0(
+        "Will train until there is no improvement in "
+        , stopping_rounds
+        , " rounds."
+      )
+      print(msg)
     }
 
     # Internally treat everything as a maximization task
@@ -359,8 +364,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
             # Print message if verbose
             if (isTRUE(verbose)) {
 
-              cat("Early stopping, best iteration is:", "\n")
-              cat(best_msg[[i]], "\n")
+              print(paste0("Early stopping, best iteration is: ", best_msg[[i]]))
 
             }
 
@@ -380,8 +384,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
 
         # Print message if verbose
         if (isTRUE(verbose)) {
-          cat("Did not meet early stopping, best iteration is:", "\n")
-          cat(best_msg[[i]], "\n")
+          print(paste0("Did not meet early stopping, best iteration is: ", best_msg[[i]]))
         }
 
         # Store best iteration and stop

diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
@@ -721,7 +721,7 @@ Booster <- R6::R6Class(
 #'         number of columns corresponding to the number of trees.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -780,7 +780,7 @@ predict.lgb.Booster <- function(object,
 #' @return lgb.Booster
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -840,7 +840,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
 #' @return lgb.Booster
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -886,7 +886,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
 #' @return json format of model
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' library(lightgbm)
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
@@ -930,10 +930,10 @@ lgb.dump <- function(booster, num_iteration = NULL) {
 #'              (the default), evaluation results for all iterations will be returned.
 #' @param is_err TRUE will return evaluation error instead
 #'
-#' @return vector of evaluation result
+#' @return numeric vector of evaluation result
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' # train a regression model
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train

diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
@@ -720,7 +720,7 @@ Dataset <- R6::R6Class(
 #' @return constructed dataset
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -766,7 +766,7 @@ lgb.Dataset <- function(data,
 #' @return constructed dataset
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -793,12 +793,13 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
 #' @param dataset Object of class \code{lgb.Dataset}
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
 #' lgb.Dataset.construct(dtrain)
 #' }
+#' @return constructed dataset
 #' @export
 lgb.Dataset.construct <- function(dataset) {
 
@@ -824,7 +825,7 @@ lgb.Dataset.construct <- function(dataset) {
 #' be directly used with an \code{lgb.Dataset} object.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -858,7 +859,7 @@ dim.lgb.Dataset <- function(x, ...) {
 #' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -869,6 +870,7 @@ dim.lgb.Dataset <- function(x, ...) {
 #' print(dtrain, verbose = TRUE)
 #' }
 #' @rdname dimnames.lgb.Dataset
+#' @return A list with the dimension names of the dataset
 #' @export
 dimnames.lgb.Dataset <- function(x) {
 
@@ -883,6 +885,7 @@ dimnames.lgb.Dataset <- function(x) {
 }
 
 #' @rdname dimnames.lgb.Dataset
+#' @return A list with the dimension names of the dataset
 #' @export
 `dimnames<-.lgb.Dataset` <- function(x, value) {
 
@@ -929,7 +932,7 @@ dimnames.lgb.Dataset <- function(x) {
 #' @return constructed sub dataset
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -944,6 +947,7 @@ slice <- function(dataset, ...) {
 }
 
 #' @rdname slice
+#' @return constructed sub dataset
 #' @export
 slice.lgb.Dataset <- function(dataset, idxset, ...) {
 
@@ -976,7 +980,7 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
 #' }
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -994,6 +998,7 @@ getinfo <- function(dataset, ...) {
 }
 
 #' @rdname getinfo
+#' @return info data
 #' @export
 getinfo.lgb.Dataset <- function(dataset, name, ...) {
 
@@ -1013,7 +1018,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 #' @param name the name of the field to get
 #' @param info the specific field of information to set
 #' @param ... other parameters
-#' @return passed object
+#' @return the dataset you passed in
 #'
 #' @details
 #' The \code{name} field can be one of the following:
@@ -1029,7 +1034,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 #' }
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1047,6 +1052,7 @@ setinfo <- function(dataset, ...) {
 }
 
 #' @rdname setinfo
+#' @return the dataset you passed in
 #' @export
 setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
 
@@ -1066,10 +1072,10 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
 #' @param categorical_feature categorical features. This can either be a character vector of feature
 #'                            names or an integer vector with the indices of the features (e.g.
 #'                            \code{c(1L, 10L)} to say "the first and tenth columns").
-#' @return passed dataset
+#' @return the dataset you passed in
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1097,10 +1103,10 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param reference object of class \code{lgb.Dataset}
 #'
-#' @return passed dataset
+#' @return the dataset you passed in
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package ="lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -1129,10 +1135,10 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
 #' @param dataset object of class \code{lgb.Dataset}
 #' @param fname object filename of output file
 #'
-#' @return passed dataset
+#' @return the dataset you passed in
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)

diff --git a/R-package/R/lgb.convert_with_rules.R b/R-package/R/lgb.convert_with_rules.R
@@ -68,7 +68,7 @@
 #'         \code{lgb.Dataset}.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(iris)
 #'
 #' str(iris)

diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
@@ -52,7 +52,7 @@ CVBooster <- R6::R6Class(
 #' @return a trained model \code{lgb.CVBooster}.
 #'
 #' @examples
-#' \dontrun{
+#' \donttest{
 #' data(agaricus.train, package = "lightgbm")
 #' train <- agaricus.train
 #' dtrain <- lgb.Dataset(train$data, label = train$label)
@@ -466,7 +466,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
 
     # When doing group, stratified is not possible (only random selection)
     if (nfold > length(group)) {
-      stop("\n\tYou requested too many folds for the number of available groups.\n")
+      stop("\nYou requested too many folds for the number of available groups.\n")
     }
 
     # Degroup the groups