diff --git a/DESCRIPTION b/DESCRIPTION index 882df32c..12d42364 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -65,5 +65,5 @@ Suggests: DSOpal, DSMolgenisArmadillo, DSLite -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index d737d5e6..255bd61f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(ds.dataFrame) export(ds.dataFrameFill) export(ds.dataFrameSort) export(ds.dataFrameSubset) +export(ds.date) export(ds.densityGrid) export(ds.dim) export(ds.dmtC2S) @@ -83,6 +84,7 @@ export(ds.mice) export(ds.names) export(ds.ns) export(ds.numNA) +export(ds.predict) export(ds.qlspline) export(ds.quantileMean) export(ds.rBinom) @@ -96,9 +98,12 @@ export(ds.recodeLevels) export(ds.recodeValues) export(ds.rep) export(ds.replaceNA) +export(ds.resPlot) export(ds.rm) +export(ds.round) export(ds.rowColCalc) export(ds.sample) +export(ds.scale) export(ds.scatterPlot) export(ds.seq) export(ds.setSeed) diff --git a/R/ds.date.R b/R/ds.date.R new file mode 100644 index 00000000..00205a5b --- /dev/null +++ b/R/ds.date.R @@ -0,0 +1,325 @@ +#' +#' @title Creates date objects using a server-side object +#' @description Generates objects using a server-side object, which can be either a vector or +#' a data-frame column. Supports three operations: +#' 1. Extract components of a date (\code{extractdate}) +#' 2. Combine numeric year, month, and day into a full date (\code{makedate}) +#' 3. Compute the time interval between two dates (\code{timebetween}) +#' +#' @details +#' If the input is a data-frame column, it must be provided in the \code{x} argument as data-frame$column. +#' Inputs for \code{extractdate} and \code{timebetween} must be date objects. +#' For \code{makedate}, three numeric vectors (year, month, day) must be provided in the correct order. +#' The \code{add.column} argument determines whether the result is added as a new column in the existing +#' data-frame (\code{TRUE}), or created as a new server-side object (\code{FALSE}). +#' For \code{timebetween}, \code{months(1)} and \code{years(1)} are calendar periods (from the \code{lubridate} package). +#' The expression \code{interval() %/% months(1)} counts whole calendar months between two dates. +#' Examples: Jan 31 → Feb 2 = 0 months; Jan 31 → Mar 2 = 1 month. +#' \code{days(1)}, it counts fixed 24-hour durations. +#' Note: \code{add.column = TRUE} is only valid for data-frame inputs. +#' +#' Server function called: \code{dateDS} +#' +#' @param x Character vector specifying the server-side object(s). For data-frame columns, use the format \code{df$column}. +#' @param type Character string specifying the operation: \code{"extractdate"}, \code{"makedate"}, or \code{"timebetween"}. +#' @param newobj Character string for the name of the object that will be created on the server. Default is \code{"date.result"}. +#' @param unit Character string specifying the unit for \code{extractdate} or \code{timebetween}: \code{"days"}, \code{"months"}, or \code{"years"}. +#' @param add.column Logical. If \code{FALSE}, the result is created as a new server-side object; +#' if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}. +#' @param datasources A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' If the \code{datasources} argument is not specified the default set of connections will be used: +#' see \code{\link[DSI]{datashield.connections_default}}. +#' +#' +#' @examples +#' +#' \dontrun{ +#' +#' require('DSI') +#' require('DSOpal') +#' require('dsBaseClient') +#' +#' builder <- DSI::newDSLoginBuilder() +#' builder$append(server = "study1", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "GWAS.ega_phenotypes_1", driver = "OpalDriver") +#' builder$append(server = "study2", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "GWAS.ega_phenotypes_2", driver = "OpalDriver") +#' +#' logindata <- builder$build() +#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") +#' +#' ds.make(toAssign = "D$date_diagnosis", +#' newobj = 'diagnosis_date', datasources = connections) +#' ds.date(x="D$date_diagnosis", type = "extractdate", +#' newobj = "diag_month", unit = "months", add.column = TRUE) +#' ds.date(x="D$date_diagnosis", type = "extractdate", +#' newobj = "diag_day", unit = "days", add.column = TRUE) +#' +#' +#' # Example 1: Create a new object by extracting the year from an object +#' ds.date(x="diagnosis_date", type = "extractdate", +#' newobj = "diagnosis_year", unit = "years", add.column = FALSE) +#' +#' # Example 2: Create a new column by extracting year from an object. This will result in an error since +#' # creating a new column option requires a dataframe input. +#' ds.date(x="diagnosis_date", type = "extractdate", +#' newobj = "diagnosis_year", unit = "years", add.column = TRUE) +#' +#' # Example 3: Create a new date column by combining 3 objects: 2 columns and 1 vector. +#' ds.date(x=c("diagnosis_year", "D$diag_month", "D$diag_day"), type = "makedate", +#' newobj = "combined_date", add.column = TRUE) +#' +#' # Example 4: Create a new object by calculating time between one column and one object in months. +#' ds.date(x=c("diagnosis_date", "D$date_death"), type = "timebetween", +#' newobj = "timebetween.months", unit = "months", add.column = FALSE) +#' +#' # Clear the Datashield R sessions and logout +#' datashield.logout(connections) +#' } +#' +#' @author Zulal Bekerecioglu +#' @export +#' + + + +ds.date <- function(x=NULL, type=c("extractdate", "makedate", "timebetween"), + unit=c("days", "months", "years"), add.column=FALSE, + newobj="date.result", datasources = NULL) { + + # look for DS connections + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + + # ensure datasources is a list of DSConnection-class + if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ + stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) + } + + unit <- match.arg(unit) + + # check if parameter type is valid : in c("extractdate", "makedate", "timebetween") + if (!(type %in% c("extractdate", "makedate", "timebetween"))) { + stop("Invalid value for 'type'. Must be one of: extractdate, makedate, timebetween.") + } + + # if x is empty, throw an error + if (is.null(x)) { + stop("Argument 'x' cannot be NULL. Please provide an object name or column name(s).") + } + + # If add.column is TRUE, then inputs must have at least one column, i.e. one element should have $. + # If there are multiple columns, then the df must be shared. + if (add.column) { + error_message <- "Input object not valid: when 'add.column' is TRUE, at least one element must be a column. + If multiple columns are specified, they must all belong to the same dataframe." + + # Identify elements that contain a $ + has_dollar <- grepl("\\$", x, perl = TRUE) + + # At least one element must contain $ + if (!any(has_dollar)) stop(error_message, call. = FALSE) + + # Multiple $ elements: check that the prefix before $ is identical + if (sum(has_dollar) > 1) { + prefixes <- vapply(strsplit(x[has_dollar], "\\$", perl = TRUE), + function(x) x[[1]], FUN.VALUE = character(1)) + if (length(unique(prefixes)) != 1) stop(error_message, call. = FALSE) + + common_df <- unique(prefixes) + } else { + # Single $ element: ensure both prefix and suffix are non-empty + parts <- strsplit(x[has_dollar], "\\$", perl = TRUE)[[1]] + if (length(parts) != 2 || !nzchar(parts[1]) || !nzchar(parts[2])) stop(error_message, call. = FALSE) + + common_df <- parts[1] + } + } + + + args <- c( + sprintf('x = c(%s)', paste(sprintf('"%s"', x), collapse = ", ")), + sprintf('type = "%s"', type), + sprintf('newobj = "%s"', newobj), + sprintf('add.column = "%s"', add.column), + sprintf('unit = "%s"', unit) + ) + + + cally <- paste0("dateDS(", paste(args, collapse = ", "), ")") + + # extractdate ------- + if (type == "extractdate") { + if (length(unit) != 1 || !(unit %in% c("days", "months", "years"))) { + stop("Invalid unit. Must be one of: days, months, years") + } + + # get a column with a date, extract year, month, or day according to specificed unit + # save as a new object + if (!add.column) { + if (newobj == "newdate") { + newobj <- paste0(newobj, ".", unit) + } + DSI::datashield.assign(datasources, symbol = newobj, as.symbol(cally)) + message_text <- sprintf("Extracted %s and saved it as a new object named '%s'.", + unit, newobj) + + + } else { # save as a new column + if (newobj == "newdate") { + # add the unit after newdate for readability (newdate.months) + newobj <- paste0(newobj, ".", unit) + } + DSI::datashield.assign(datasources, symbol = common_df, as.symbol(cally)) + message_text <- sprintf("Extracted %s from %s and added it as a new column named '%s'.", + unit, common_df, newobj) + + } + } + + + # makedate ------- + if (type == "makedate") { + + # get three columns in a list as 'year' 'month' 'day', combine them into one column + # check if provided x is a list of length 3 + if (!(is.character(x) && length(x) == 3)) { + stop("For 'makedate', x must be a character vector of length 3 (year, month, day).") + } + + # save as a new object + if (!add.column) { + DSI::datashield.assign(datasources, symbol = newobj, as.symbol(cally)) + message_text <- sprintf("Created a combined date and saved it as a new object named '%s'.", + newobj) + + + } else { # save as a new column + if (newobj == "newdate") { + newobj <- paste0(newobj, ".", unit) + } + DSI::datashield.assign(datasources, symbol = common_df, as.symbol(cally)) + message_text <- sprintf("Created a combined date and added it as a new column in '%s' named '%s'.", + common_df, newobj) + + } + + } + + + # timebetween -------- + if (type == "timebetween") { + # get two columns as a list, get the unit of return unit, calculate the time between in that unit + + if (!(is.character(x) && length(x) == 2)) { + stop("For 'timebetween', x must be a character vector of length 2 (start column, end column).") + } + + # save as a new object + if (!add.column) { + DSI::datashield.assign(datasources, symbol = newobj, as.symbol(cally)) + message_text <- sprintf("Calculated the time difference and saved it as a new object named '%s'.", + newobj) + + + } else { # save as a new column + if (newobj == "newdate") { + newobj <- paste0(newobj, ".", unit) + } + DSI::datashield.assign(datasources, symbol = common_df, as.symbol(cally)) + message_text <- sprintf("Calculated the time difference and added it as a new column in '%s' named '%s'.", + common_df, newobj) + + } + + } + + + ############################################################################################################# + # Check that the object (or dataframe with new column) was successfully created on all servers + ############################################################################################################# + + test.obj.name <- if (!add.column) newobj else common_df + + # Run server-side object existence test + calltext <- call("testObjExistsDS", test.obj.name) + object.info <- DSI::datashield.aggregate(datasources, calltext) + + num.datasources <- length(object.info) + obj.name.exists.in.all.sources <- TRUE + obj.non.null.in.all.sources <- TRUE + + for (j in 1:num.datasources) { + if (!object.info[[j]]$test.obj.exists) { + obj.name.exists.in.all.sources <- FALSE + } + if (is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)) { + obj.non.null.in.all.sources <- FALSE + } + } + + if (obj.name.exists.in.all.sources && obj.non.null.in.all.sources) { + return.message <- paste0("A data object <", test.obj.name, "> has been created in all specified data sources.") + } else { + return.message.1 <- paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources.") + return.message.2 <- paste0("It is either ABSENT and/or has no valid content/class, see return.info above.") + return.message.3 <- paste0("Please use ds.ls() or ds.names() to identify where missing.") + return.message <- list(return.message.1, return.message.2, return.message.3) + } + + # Check for study-side messages + calltext <- call("messageDS", test.obj.name) + studyside.message <- DSI::datashield.aggregate(datasources, calltext) + + no.errors <- TRUE + for (nd in 1:num.datasources) { + if (studyside.message[[nd]] != "ALL OK: there are no studysideMessage(s) on this datasource.") { + no.errors <- FALSE + } + } + + + if (no.errors) { + if (add.column) { + # Check if the new column exists in the dataframe on all servers + calltext.names <- call("namesDS", common_df) + df.colnames <- DSI::datashield.aggregate(datasources, calltext.names) + + col.exists.in.all.sources <- TRUE + for (j in 1:length(df.colnames)) { + if (!(newobj %in% df.colnames[[j]])) { + col.exists.in.all.sources <- FALSE + } + } + + if (col.exists.in.all.sources) { + validity.check <- paste0("New column <", newobj, "> successfully added to dataframe <", common_df, "> in all sources.") + } else { + validity.check <- paste0("Warning: column <", newobj, "> not found in dataframe <", common_df, "> in one or more sources. Check with ds.names().") + } + } else { + validity.check <- paste0("<", test.obj.name, "> appears valid in all sources.") + } + + return(list(is.object.created = return.message, + validity.check = validity.check)) + + } else { + validity.check <- paste0("<", test.obj.name, "> invalid in at least one source. See studyside.messages:") + + return(list(is.object.created = return.message, + validity.check = validity.check, + studyside.messages = studyside.message)) + } + + ############################################################################################################# + # End of check + ############################################################################################################# + +} +#ds.date \ No newline at end of file diff --git a/R/ds.predict.R b/R/ds.predict.R new file mode 100644 index 00000000..a0c7ccd3 --- /dev/null +++ b/R/ds.predict.R @@ -0,0 +1,221 @@ +#' +#' @title ds.predict +#' @description Generates server-side predictions using the client-side output from \code{ds.glm}. +#' +#' @details +#' This function takes the client-side output from \code{ds.glm} and sends the necessary components +#' (coefficients, family, formula, and any categorical variables) to the server for prediction. +#' +#' Server function called: \code{predictDS2} +#' +#' @param name The client-side return object from \code{ds.glm}. +#' @param newdataname A character string specifying the name of the new dataset to be used for predictions. +#' @param type A character string specifying the type of prediction. Options are \code{"response"} or \code{"link"}. +#' @param newobj A character string specifying the name of the output object created on the server. +#' Default is \code{"new.predictions"}. +#' @param traindataname A character string specifying the name of the dataset used for model training. +#' @param na.action A character string to specify the action to take if missing values are present. Default is \code{"na.pass"}. +#' @param datasources A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' If the \code{datasources} argument is not specified the default set of connections will be used: +#' see \code{\link[DSI]{datashield.connections_default}}. +#' +#' +#' @examples +#' +#' \dontrun{ +#' +#' require('DSI') +#' require('DSOpal') +#' require('dsBaseClient') +#' +#' builder <- DSI::newDSLoginBuilder() +#' builder$append(server = "study1", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "CNSIM.CNSIM1", driver = "OpalDriver") +#' builder$append(server = "study2", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "CNSIM.CNSIM2", driver = "OpalDriver") +#' builder$append(server = "study3", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "CNSIM.CNSIM3", driver = "OpalDriver") +#' logindata <- builder$build() +#' +#' # Log onto the remote Opal training servers +#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") +#' +#' +#' # Example: Fit the model using ds.glm for study1 and study2 +#' fitted_model <- ds.glm(formula = "LAB_TSC ~ LAB_HDL + PM_BMI_CONTINUOUS * GENDER + MEDI_LPD", +#' data = "D", family = "gaussian", datasources = connections[c("study1", "study2"])) +#' +#' # Predictions for study3 +#' ds.predict(name = fitted_model, newdataname = "D", type = "response", +#' traindataname="D", na.action="na.omit", datasources = connections["study3"]) +#' +#' +#' # Clear the Datashield R sessions and logout +#' datashield.logout(connections) +#' } +#' +#' @author Zulal Bekerecioglu +#' @export +#' + +ds.predict <- function(name = NULL, newdataname = NULL, type = c("response", "link"), + newobj = "new.predictions", traindataname = NULL, + na.action = "na.pass", datasources = NULL) { + + # look for DS connections + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + + # ensure datasources is a list of DSConnection-class + if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ + stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) + } + + type <- match.arg(type) + + # Ensure new data and training data names are provided + if (is.null(newdataname)) { + stop("The argument 'newdataname' cannot be empty. Please provide the name of the dataset for predictions.", call. = FALSE) + } + + if (is.null(traindataname)) { + stop("The argument 'traindataname' cannot be empty. Please provide the name of the training dataset.", call. = FALSE) + } + + # Ensure model object name is provided + if (is.null(name)) { + stop("The argument 'name' cannot be empty. Please provide the ds.glm output object.", call. = FALSE) + } + + # Sending necessary components to the server-side + # Create a coefficients object from the model, numbers only + ds.make(toAssign = paste0("c(", paste(name$coefficients[, 1], collapse = ", "), + ")"), + newobj = 'predictDS_coefficients', datasources = datasources) + + # Create family and link function object from the model + ds.make(toAssign = paste0("\"", paste(name$family$family, "link", name$family$link, sep = "."), + "\""), + newobj = 'predictDS_family', datasources = datasources) + + # Create a formula object from the model + ds.make(toAssign = paste0(name$formula), + newobj = 'predictDS_formula', datasources = datasources) + + + # Create a categorical_variables object from the model. Necessary for correct factoring! + + # Compare coefficient names from the model output and the elements from the formula + # if there is a mismatch, check if any of the coefficient names start with a formula element -> Sexmale from Sex, Pclass2 from Pclass + # if it does, save it as a categorical variable. If there is no factors detected, save an empty list as categorical_variables + categorical_variables <- c() + coefficient_names <- names(name$coefficients[, 1]) + formula_elements <- labels(terms(formula(name$formula))) + main_effects <- formula_elements[!grepl(":", formula_elements)] # excluding terms with : + + for(element in main_effects){ + if(!(element %in% coefficient_names)){ + partial_match <- coefficient_names[startsWith(coefficient_names, element)] + + if (length(partial_match) > 0) { + categorical_variables <- append(categorical_variables, element) + } + } + } + + if(length(categorical_variables) > 0) { + ds.make(toAssign = paste0("c(", paste0("'", categorical_variables, "'", collapse = ", "), ")"), + newobj = 'predictDS_categorical_variables', datasources = datasources) + } else { + ds.make(toAssign = "NULL", + newobj = 'predictDS_categorical_variables', datasources = datasources) + } + + + # Build the call string + cally <- paste0( + "predictDS(", + "newdataname = '", newdataname, "', ", + "traindataname = '", traindataname, "', ", + "type = '", type, "', ", + "na.action = '", na.action, "' ", + ")" + ) + + + DSI::datashield.assign(datasources, symbol = newobj, as.symbol(cally)) + + # Remove the objects + ds.rm(x.names = "predictDS_coefficients", datasources = datasources) + ds.rm(x.names = "predictDS_family", datasources = datasources) + ds.rm(x.names = "predictDS_formula", datasources = datasources) + ds.rm(x.names = "predictDS_categorical_variables", datasources = datasources) + + + + ############################################################################################################# + # Check that the object was successfully created on all servers + ############################################################################################################# + + test.obj.name <- newobj + + # Run server-side object existence test + calltext <- call("testObjExistsDS", test.obj.name) + object.info <- DSI::datashield.aggregate(datasources, calltext) + + num.datasources <- length(object.info) + obj.name.exists.in.all.sources <- TRUE + obj.non.null.in.all.sources <- TRUE + + for (j in 1:num.datasources) { + if (!object.info[[j]]$test.obj.exists) { + obj.name.exists.in.all.sources <- FALSE + } + if (is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)) { + obj.non.null.in.all.sources <- FALSE + } + } + + if (obj.name.exists.in.all.sources && obj.non.null.in.all.sources) { + return.message <- paste0("A data object <", test.obj.name, "> has been created in all specified data sources.") + } else { + return.message.1 <- paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources.") + return.message.2 <- paste0("It is either ABSENT and/or has no valid content/class, see return.info above.") + return.message.3 <- paste0("Please use ds.ls() or ds.names() to identify where missing.") + return.message <- list(return.message.1, return.message.2, return.message.3) + } + + # Check for study-side messages + calltext <- call("messageDS", test.obj.name) + studyside.message <- DSI::datashield.aggregate(datasources, calltext) + + no.errors <- all(unlist(studyside.message) == "ALL OK: there are no studysideMessage(s) on this datasource") + + + if (no.errors) { + validity.check <- paste0("<", test.obj.name, "> appears valid in all sources.") + + return(list(is.object.created = return.message, + validity.check = validity.check)) + + } else { + validity.check <- paste0("<", test.obj.name, "> invalid in at least one source. See studyside.messages:") + + return(list(is.object.created = return.message, + validity.check = validity.check, + studyside.messages = studyside.message)) + } + + ############################################################################################################# + # End of check + ############################################################################################################# + +} +#ds.predict diff --git a/R/ds.resPlot.R b/R/ds.resPlot.R new file mode 100644 index 00000000..cab07f4b --- /dev/null +++ b/R/ds.resPlot.R @@ -0,0 +1,148 @@ +# naming for 'which' argument : https://stat.ethz.ch/R-manual/R-patched/library/stats/html/plot.lm.html + +#' +#' @title ds.resplots +#' @description Creates residual plots from \code{ds.glm}, using \code{ds.predict} and \code{ds.scatterPlot}. +#' +#' @details +#' This function generates residual plots (residuals vs fitted) and/or QQ-plots +#' for a model fitted with ds.glm, based on the `which` argument. +#' +#' +#' @param name The client-side return object from \code{ds.glm}. +#' @param traindataname The name of the dataset used to train the model. +#' @param which A numeric value deciding what type of plot to return. +#' 1 = residuals vs fitted plot, 2 = QQ plot, 0 = both (default). +#' @param datasources A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' If the \code{datasources} argument is not specified the default set of connections will be used: +#' see \code{\link[DSI]{datashield.connections_default}}. +#' +#' +#' @examples +#' +#' \dontrun{ +#' +#' require('DSI') +#' require('DSOpal') +#' require('dsBaseClient') +#' +#' builder <- DSI::newDSLoginBuilder() +#' builder$append(server = "study1", url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "CNSIM.CNSIM1", driver = "OpalDriver") +#' builder$append(server = "study2", url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "CNSIM.CNSIM2", driver = "OpalDriver") +#' logindata <- builder$build() +#' +#' # Log onto the remote Opal training servers +#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") +#' +#' +#' # Example 1: Fit the model using ds.glm for study1 +#' fitted_model <- ds.glm(formula = "LAB_TSC ~ LAB_HDL + PM_BMI_CONTINUOUS * GENDER + MEDI_LPD", +#' data = "D", family = "gaussian", datasources = connections) +#' +#' # Residuals for the model +#' ds.resPlot(name = fitted_model, traindataname="D", datasources = connections) +#' +#' +#' # Clear the Datashield R sessions and logout +#' datashield.logout(connections) +#' } +#' +#' @author Zulal Bekerecioglu +#' @export +#' +#' +ds.resPlot <- function(name, traindataname, which=0, pch = 1, col = "black", + lty = 1, datasources=NULL) { + + #source("./ds.predict.R") + + which_values <- list("0" = "both", "both" = "both", + "1" = "res", "res" = "res", + "2" = "qq", "qq" = "qq") + + which <- tolower(as.character(which)) + + # 'which' argument can only be 0, 1, or 2. + if (!which %in% names(which_values)) { + stop("Invalid `which` argument. Must be one of: 0 / 'both', 1 / 'res', 2 / 'qq'.") + } + + which <- which_values[[which]] + + # look for DS connections + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + + # ensure datasources is a list of DSConnection-class + if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ + stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) + } + + # Ensure model object name is provided + if (is.null(name)) { + stop("The argument 'name' cannot be empty. Please provide the ds.glm output object.", call. = FALSE) + } + + if (is.null(traindataname)) { + stop("The argument 'traindataname' cannot be empty. Please provide the name of the training dataset.", call. = FALSE) + } + + # Generate model predictions on the same training data + ds.predict(name = name, traindataname = traindataname, newdataname = traindataname, + datasources = connections, newobj = "prediction.for.res") + + # Extract outcome variable name from model formula + outcome_variable <- all.vars(formula(name$formula)[[2]]) + + # Compute residuals (Y - fitted) on the server + ds.make(toAssign = paste0(traindataname, "$", outcome_variable, "-prediction.for.res"), + newobj = 'residuals.for.plot', datasources = datasources) + + # Standardize residuals + res_var <- ds.var(x = "residuals.for.plot", type = "split", datasources = datasources) + res_sd <- sqrt(res_var[["Variance.by.Study"]][, "EstimatedVar"]) + names(res_sd) <- rownames(res_var[["Variance.by.Study"]]) + + for (i in seq_along(datasources)) { + ds.make(toAssign = paste0("residuals.for.plot / ", res_sd[i]), + newobj = "std.residuals.for.plot", + datasources = datasources[i]) + } + + # Get the anonymised scatterplot points for residuals + pdf(NULL) # to prevent unwanted plot output + plottedpoints <- as.data.frame(ds.scatterPlot(x='prediction.for.res', y='std.residuals.for.plot', type='combine', + datasources=datasources, return.coords=TRUE)[1]) + + if (!interactive()) grDevices::pdf("resplots.pdf") else grDevices::dev.new() + + ds.rm(x.names = "prediction.for.res", datasources = datasources) + ds.rm(x.names = "std.residuals.for.plot", datasources = datasources) + ds.rm(x.names = "residuals.for.plot", datasources = datasources) + + # Rename columns + names(plottedpoints) <- c("Fitted values", "Residuals") + + if (which %in% c("res", "both")) { + plot(plottedpoints$"Fitted values", plottedpoints$Residuals, pch = pch, + col = col, lty = lty, main = "Residuals vs Fitted", + xlab = "Predicted values", ylab = "Residuals") + + abline(h = 0, lty = 2, col = "gray") + + lines(lowess(plottedpoints$"Fitted values", plottedpoints$Residuals), col = "red") + } + + + if (which %in% c("qq", "both")) { + qqnorm(plottedpoints$Residuals, main = "Q-Q Plot of Residuals", pch = pch, + col = col) + qqline(plottedpoints$Residuals, col = "black", lty = 2) + } + +} diff --git a/R/ds.round.R b/R/ds.round.R new file mode 100644 index 00000000..3f5650e9 --- /dev/null +++ b/R/ds.round.R @@ -0,0 +1,204 @@ +#' +#' @title ds.round +#' @description Generates objects using a server-side object, which can be either a vector or +#' a data-frame column. Supports five operations: +#' 1. (\code{round}) +#' 2. (\code{ceiling}) +#' 3. (\code{floor}) +#' 4. (\code{trunc}) +#' 5. (\code{signif}) +#' where each function in baseR is applied on the server side to the specified object. +#' +#' @details +#' Note: \code{add.column = TRUE} is only valid for data-frame inputs. +#' +#' Server function called: \code{DateDS} +#' +#' @param x Character vector specifying the server-side object(s). For data-frame columns, use the format \code{df$column}. +#' @param type Character string specifying the operation: \code{"round"}, \code{"ceiling"}, \code{"floor"}, +#' \code{trunc}, or \code{"signif"}. +#' @param digits Number of digits to be used in arguments \code{"round"} and \code{"signif"}. +#' @param add.column Logical. If \code{FALSE}, the result is created as a new server-side object; +#' if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}. +#' @param newobj Character string for the name of the object that will be created on the server. Default is \code{"rounding.result"}. +#' @param datasources A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' If the \code{datasources} argument is not specified the default set of connections will be used: +#' see \code{\link[DSI]{datashield.connections_default}}. +#' +#' +#' @examples +#' +#' \dontrun{ +#' +#' require('DSI') +#' require('DSOpal') +#' require('dsBaseClient') +#' +#' builder <- DSI::newDSLoginBuilder() +#' builder$append(server = "study1", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "DASIM.DASIM1", driver = "OpalDriver") +#' builder$append(server = "study2", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "DASIM.DASIM2", driver = "OpalDriver") +#' logindata <- builder$build() +#' +#' # Log onto the remote Opal training servers +#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") +#' +#' ds.make(toAssign = "D$LAB_TSC", newobj = 'LAB.TSC.obj', datasources = connections) +#' +#' # Example 1: Give a numeric object, save as a new object +#' ds.round("LAB.TSC.obj", digits=2, add.column = FALSE) +#' +#' # Example 2: Give a column, save as a new column. +#' ds.round("D$LAB_HDL", type = "ceiling", newobj = "LAB_rounded_HDL", add.column = TRUE) +#' +#' +#' # Clear the Datashield R sessions and logout +#' datashield.logout(connections) +#' } +#' +#' @author Zulal Bekerecioglu +#' @export +#' +#' +#' +ds.round <- function(x=NULL, type=c("round", "ceiling", "floor", "trunc", "signif"), digits=0, + add.column = FALSE, newobj = "rounding.result", datasources = NULL) { + + # look for DS connections + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + + # ensure datasources is a list of DSConnection-class + if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ + stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) + } + + # if x is empty, throw an error + if (is.null(x)) { + stop("Argument 'x' cannot be NULL. Please provide an object name or column name(s).") + } + + type <- match.arg(type) + + + # Build the call string + args <- c( + sprintf('x = "%s"', x), + sprintf('type = "%s"', type), + sprintf('digits = %s', digits), + sprintf('add.column = %s', add.column), + sprintf('newobj = "%s"', newobj) + ) + + cally <- paste0("roundDS(", paste(args, collapse = ", "), ")") + + if(!add.column){ + # Save the rounding result as a new object + DSI::datashield.assign(datasources, symbol = newobj, as.symbol(cally)) + + } else { + # If add.column is TRUE, first check whether the object is a column in a dataframe, + # if it is, save the result as a new column in that dataframe + + # Check if x contains a $ + if(!grepl("\\$", x)){ + stop("The argument `x` is not a column. To save the result, either provide a column (e.g., df$colname) or set add.column = FALSE.", call. = FALSE) + } else { + # Extract dataframe name + dataframe_name <- strsplit(x, "\\$")[[1]][1] + + # Assign as a column in the dataframe + DSI::datashield.assign(datasources, symbol = dataframe_name, as.symbol(cally)) + + } + } + + ############################################################################################################# + # Check that the object (or dataframe with new column) was successfully created on all servers + ############################################################################################################# + + test.obj.name <- if (!add.column) newobj else dataframe_name + + # Run server-side object existence test + calltext <- call("testObjExistsDS", test.obj.name) + object.info <- DSI::datashield.aggregate(datasources, calltext) + + num.datasources <- length(object.info) + obj.name.exists.in.all.sources <- TRUE + obj.non.null.in.all.sources <- TRUE + + for (j in 1:num.datasources) { + if (!object.info[[j]]$test.obj.exists) { + obj.name.exists.in.all.sources <- FALSE + } + if (is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)) { + obj.non.null.in.all.sources <- FALSE + } + } + + if (obj.name.exists.in.all.sources && obj.non.null.in.all.sources) { + return.message <- paste0("A data object <", test.obj.name, "> has been created in all specified data sources.") + } else { + return.message.1 <- paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources.") + return.message.2 <- paste0("It is either ABSENT and/or has no valid content/class, see return.info above.") + return.message.3 <- paste0("Please use ds.ls() or ds.names() to identify where missing.") + return.message <- list(return.message.1, return.message.2, return.message.3) + } + + # Check for study-side messages + calltext <- call("messageDS", test.obj.name) + studyside.message <- DSI::datashield.aggregate(datasources, calltext) + + no.errors <- TRUE + for (nd in 1:num.datasources) { + if (studyside.message[[nd]] != "ALL OK: there are no studysideMessage(s) on this datasource.") { + no.errors <- FALSE + } + } + + + if (no.errors) { + if (add.column) { + # Check if the new column exists in the dataframe on all servers + calltext.names <- call("namesDS", dataframe_name) + df.colnames <- DSI::datashield.aggregate(datasources, calltext.names) + + col.exists.in.all.sources <- TRUE + for (j in 1:length(df.colnames)) { + if (!(newobj %in% df.colnames[[j]])) { + col.exists.in.all.sources <- FALSE + } + } + + if (col.exists.in.all.sources) { + validity.check <- paste0("New column <", newobj, "> appears valid in all sources.") + } else { + validity.check <- paste0("Warning: column <", newobj, "> not found in dataframe <", dataframe_name, "> in one or more sources. Check with ds.names().") + } + } else { + validity.check <- paste0("<", test.obj.name, "> appears valid in all sources.") + } + + return(list(is.object.created = return.message, + validity.check = validity.check)) + + } else { + validity.check <- paste0("<", test.obj.name, "> invalid in at least one source. See studyside.messages:") + + return(list(is.object.created = return.message, + validity.check = validity.check, + studyside.messages = studyside.message)) + } + + ############################################################################################################# + # End of check + ############################################################################################################# + +} +#ds.round diff --git a/R/ds.scale.R b/R/ds.scale.R new file mode 100644 index 00000000..d31d029c --- /dev/null +++ b/R/ds.scale.R @@ -0,0 +1,196 @@ +#' +#' @title Standardizes a server-side vector +#' @description Scales / standardizes a server-side vector using the scale function +#' +#' @details +#' Note: \code{add.column = TRUE} is only valid for data-frame inputs. +#' +#' Server function called: \code{scaleDS} +#' +#' @param x A character string specifying the server-side vector For data-frame columns, use the format \code{df$column}. +#' @param newobj A character string for the name of the object that will be created on the server. Default is \code{"scaled.data"}. +#' @param add.column Logical. If \code{FALSE}, the result is created as a new server-side object; +#' if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}. +#' @param datasources A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' If the \code{datasources} argument is not specified the default set of connections will be used: +#' see \code{\link[DSI]{datashield.connections_default}}. +#' +#' +#' +#' @examples +#' +#' \dontrun{ +#' +#' require('DSI') +#' require('DSOpal') +#' require('dsBaseClient') +#' +#' builder <- DSI::newDSLoginBuilder() +#' builder$append(server = "study1", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "GWAS.ega_phenotypes_1", driver = "OpalDriver") +#' builder$append(server = "study2", +#' url = "https://opal-demo.obiba.org", +#' user = "dsuser", password = "P@ssw0rd", +#' table = "GWAS.ega_phenotypes_2", driver = "OpalDriver") +#' logindata <- builder$build() +#' +#' # Log onto the remote Opal training servers +#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") +#' +#' ds.make(toAssign = "D$energy", newobj = 'energy.obj', datasources = connections) +#' +#' # Example 1: Give a column, save as a new column +#' ds.scale(x="D$age_recruitment", newobj="scaled.age.recruitment", add.column=TRUE) +#' +#' # Example 2: Give a numeric object, save as a new object +#' ds.scale(x="energy.obj", newobj="scaled.energy", add.column=FALSE) +#' +#' +#' # Clear the Datashield R sessions and logout +#' datashield.logout(connections) +#' } +#' +#' @author Zulal Bekerecioglu +#' @export +#' + +ds.scale <- function(x=NULL, newobj="scaled.data", add.column=FALSE, datasources = NULL) { + + # look for DS connections + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + + # ensure datasources is a list of DSConnection-class + if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ + stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) + } + + # if x is empty, throw an error + if (is.null(x)) { + stop("Argument 'x' cannot be NULL. Please provide an object name or column name(s).") + } + + # Build the call string + args <- c( + sprintf('x = "%s"', x), + sprintf('newobj = "%s"', newobj), + sprintf('add.column = "%s"', add.column) + ) + + + cally <- paste0("scaleDS(", paste(args, collapse = ", "), ")") + + is_dataframe <- grepl("\\$", x) + + if(is_dataframe) { + # Extract data-frame and column name + dataframe_name <- strsplit(x, "\\$")[[1]][1] + column_name <- strsplit(x, "\\$")[[1]][2] + } + + # Data-frame object + # Save as a new object + if(!add.column&&is_dataframe){ + DSI::datashield.assign(datasources, symbol = newobj, as.symbol(cally)) + + + } else if(add.column&&is_dataframe) { # Save as a new column + DSI::datashield.assign(datasources, symbol = dataframe_name, as.symbol(cally)) + + + } else if(!add.column&&!is_dataframe) { # Vector object, only saving as a new object is valid + DSI::datashield.assign(datasources, symbol = newobj, as.symbol(cally)) + + + } else { + stop("The argument 'x' is not a column. To save the result, + either provide a column (e.g., df$colname) or set add.column = FALSE.", call. = FALSE) + + } + + ############################################################################################################# + # Check that the object (or dataframe with new column) was successfully created on all servers + ############################################################################################################# + + test.obj.name <- if (!add.column) newobj else dataframe_name + + # Run server-side object existence test + calltext <- call("testObjExistsDS", test.obj.name) + object.info <- DSI::datashield.aggregate(datasources, calltext) + + num.datasources <- length(object.info) + obj.name.exists.in.all.sources <- TRUE + obj.non.null.in.all.sources <- TRUE + + for (j in 1:num.datasources) { + if (!object.info[[j]]$test.obj.exists) { + obj.name.exists.in.all.sources <- FALSE + } + if (is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)) { + obj.non.null.in.all.sources <- FALSE + } + } + + if (obj.name.exists.in.all.sources && obj.non.null.in.all.sources) { + return.message <- paste0("A data object <", test.obj.name, "> has been created in all specified data sources.") + } else { + return.message.1 <- paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources.") + return.message.2 <- paste0("It is either ABSENT and/or has no valid content/class, see return.info above.") + return.message.3 <- paste0("Please use ds.ls() or ds.names() to identify where missing.") + return.message <- list(return.message.1, return.message.2, return.message.3) + } + + # Check for study-side messages + calltext <- call("messageDS", test.obj.name) + studyside.message <- DSI::datashield.aggregate(datasources, calltext) + + no.errors <- TRUE + for (nd in 1:num.datasources) { + if (studyside.message[[nd]] != "ALL OK: there are no studysideMessage(s) on this datasource.") { + no.errors <- FALSE + } + } + + + if (no.errors) { + if (add.column) { + # Check if the new column exists in the dataframe on all servers + calltext.names <- call("namesDS", dataframe_name) + df.colnames <- DSI::datashield.aggregate(datasources, calltext.names) + + col.exists.in.all.sources <- TRUE + for (j in 1:length(df.colnames)) { + if (!(newobj %in% df.colnames[[j]])) { + col.exists.in.all.sources <- FALSE + } + } + + if (col.exists.in.all.sources) { + validity.check <- paste0("New column <", newobj, "> successfully added to dataframe <", dataframe_name, "> in all sources.") + } else { + validity.check <- paste0("Warning: column <", newobj, "> not found in dataframe <", dataframe_name, "> in one or more sources. Check with ds.names().") + } + } else { + validity.check <- paste0("<", test.obj.name, "> appears valid in all sources.") + } + + return(list(is.object.created = return.message, + validity.check = validity.check)) + + } else { + validity.check <- paste0("<", test.obj.name, "> invalid in at least one source. See studyside.messages:") + + return(list(is.object.created = return.message, + validity.check = validity.check, + studyside.messages = studyside.message)) + } + + ############################################################################################################# + # End of check + ############################################################################################################# + +} +#ds.scale \ No newline at end of file diff --git a/man/ds.date.Rd b/man/ds.date.Rd new file mode 100644 index 00000000..b6f7de2e --- /dev/null +++ b/man/ds.date.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds.date.R +\name{ds.date} +\alias{ds.date} +\title{Creates date objects using a server-side object} +\usage{ +ds.date( + x = NULL, + type = c("extractdate", "makedate", "timebetween"), + unit = c("days", "months", "years"), + add.column = FALSE, + newobj = "date.result", + datasources = NULL +) +} +\arguments{ +\item{x}{Character vector specifying the server-side object(s). For data-frame columns, use the format \code{df$column}.} + +\item{type}{Character string specifying the operation: \code{"extractdate"}, \code{"makedate"}, or \code{"timebetween"}.} + +\item{unit}{Character string specifying the unit for \code{extractdate} or \code{timebetween}: \code{"days"}, \code{"months"}, or \code{"years"}.} + +\item{add.column}{Logical. If \code{FALSE}, the result is created as a new server-side object; +if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.} + +\item{newobj}{Character string for the name of the object that will be created on the server. Default is \code{"date.result"}.} + +\item{datasources}{A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +If the \code{datasources} argument is not specified the default set of connections will be used: +see \code{\link[DSI]{datashield.connections_default}}.} +} +\description{ +Generates objects using a server-side object, which can be either a vector or +a data-frame column. Supports three operations: +1. Extract components of a date (\code{extractdate}) +2. Combine numeric year, month, and day into a full date (\code{makedate}) +3. Compute the time interval between two dates (\code{timebetween}) +} +\details{ + +} +\examples{ + +\dontrun{ + + require('DSI') + require('DSOpal') + require('dsBaseClient') + + builder <- DSI::newDSLoginBuilder() + builder$append(server = "study1", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "GWAS.ega_phenotypes_1", driver = "OpalDriver") + builder$append(server = "study2", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "GWAS.ega_phenotypes_2", driver = "OpalDriver") + + logindata <- builder$build() + connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + + ds.make(toAssign = "D$date_diagnosis", + newobj = 'diagnosis_date', datasources = connections) + ds.date(x="D$date_diagnosis", type = "extractdate", + newobj = "diag_month", unit = "months", add.column = TRUE) + ds.date(x="D$date_diagnosis", type = "extractdate", + newobj = "diag_day", unit = "days", add.column = TRUE) + + + # Example 1: Create a new object by extracting the year from an object + ds.date(x="diagnosis_date", type = "extractdate", + newobj = "diagnosis_year", unit = "years", add.column = FALSE) + + # Example 2: Create a new column by extracting year from an object. This will result in an error since + # creating a new column option requires a dataframe input. + ds.date(x="diagnosis_date", type = "extractdate", + newobj = "diagnosis_year", unit = "years", add.column = TRUE) + + # Example 3: Create a new date column by combining 3 objects: 2 columns and 1 vector. + ds.date(x=c("diagnosis_year", "D$diag_month", "D$diag_day"), type = "makedate", + newobj = "combined_date", add.column = TRUE) + + # Example 4: Create a new object by calculating time between one column and one object in months. + ds.date(x=c("diagnosis_date", "D$date_death"), type = "timebetween", + newobj = "timebetween.months", unit = "months", add.column = FALSE) + + # Clear the Datashield R sessions and logout + datashield.logout(connections) +} + +} +\author{ +Zulal Bekerecioglu +} diff --git a/man/ds.predict.Rd b/man/ds.predict.Rd new file mode 100644 index 00000000..f330ab53 --- /dev/null +++ b/man/ds.predict.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds.predict.R +\name{ds.predict} +\alias{ds.predict} +\title{ds.predict} +\usage{ +ds.predict( + name = NULL, + newdataname = NULL, + type = c("response", "link"), + newobj = "new.predictions", + traindataname = NULL, + na.action = "na.pass", + datasources = NULL +) +} +\arguments{ +\item{name}{The client-side return object from \code{ds.glm}.} + +\item{newdataname}{A character string specifying the name of the new dataset to be used for predictions.} + +\item{type}{A character string specifying the type of prediction. Options are \code{"response"} or \code{"link"}.} + +\item{newobj}{A character string specifying the name of the output object created on the server. +Default is \code{"new.predictions"}.} + +\item{traindataname}{A character string specifying the name of the dataset used for model training.} + +\item{na.action}{A character string to specify the action to take if missing values are present. Default is \code{"na.pass"}.} + +\item{datasources}{A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +If the \code{datasources} argument is not specified the default set of connections will be used: +see \code{\link[DSI]{datashield.connections_default}}.} +} +\description{ +Generates server-side predictions using the client-side output from \code{ds.glm}. +} +\details{ +This function takes the client-side output from \code{ds.glm} and sends the necessary components +(coefficients, family, formula, and any categorical variables) to the server for prediction. + +Server function called: \code{predictDS2} +} +\examples{ + +\dontrun{ + + require('DSI') + require('DSOpal') + require('dsBaseClient') + + builder <- DSI::newDSLoginBuilder() + builder$append(server = "study1", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "CNSIM.CNSIM1", driver = "OpalDriver") + builder$append(server = "study2", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "CNSIM.CNSIM2", driver = "OpalDriver") + builder$append(server = "study3", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "CNSIM.CNSIM3", driver = "OpalDriver") + logindata <- builder$build() + + # Log onto the remote Opal training servers + connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + + + # Example: Fit the model using ds.glm for study1 and study2 + fitted_model <- ds.glm(formula = "LAB_TSC ~ LAB_HDL + PM_BMI_CONTINUOUS * GENDER + MEDI_LPD", + data = "D", family = "gaussian", datasources = connections[c("study1", "study2"])) + + # Predictions for study3 + ds.predict(name = fitted_model, newdataname = "D", type = "response", + traindataname="D", na.action="na.omit", datasources = connections["study3"]) + + + # Clear the Datashield R sessions and logout + datashield.logout(connections) +} + +} +\author{ +Zulal Bekerecioglu +} diff --git a/man/ds.resPlot.Rd b/man/ds.resPlot.Rd new file mode 100644 index 00000000..a59d239a --- /dev/null +++ b/man/ds.resPlot.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds.resPlot.R +\name{ds.resPlot} +\alias{ds.resPlot} +\title{ds.resplots} +\usage{ +ds.resPlot( + name, + traindataname, + which = 0, + pch = 1, + col = "black", + lty = 1, + datasources = NULL +) +} +\arguments{ +\item{name}{The client-side return object from \code{ds.glm}.} + +\item{traindataname}{The name of the dataset used to train the model.} + +\item{which}{A numeric value deciding what type of plot to return. +1 = residuals vs fitted plot, 2 = QQ plot, 0 = both (default).} + +\item{datasources}{A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +If the \code{datasources} argument is not specified the default set of connections will be used: +see \code{\link[DSI]{datashield.connections_default}}.} +} +\description{ +Creates residual plots from \code{ds.glm}, using \code{ds.predict} and \code{ds.scatterPlot}. +} +\details{ +This function generates residual plots (residuals vs fitted) and/or QQ-plots +for a model fitted with ds.glm, based on the `which` argument. +} +\examples{ + +\dontrun{ + + require('DSI') + require('DSOpal') + require('dsBaseClient') + + builder <- DSI::newDSLoginBuilder() + builder$append(server = "study1", url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "CNSIM.CNSIM1", driver = "OpalDriver") + builder$append(server = "study2", url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "CNSIM.CNSIM2", driver = "OpalDriver") + logindata <- builder$build() + + # Log onto the remote Opal training servers + connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + + + # Example 1: Fit the model using ds.glm for study1 + fitted_model <- ds.glm(formula = "LAB_TSC ~ LAB_HDL + PM_BMI_CONTINUOUS * GENDER + MEDI_LPD", + data = "D", family = "gaussian", datasources = connections) + + # Residuals for the model + ds.resPlot(name = fitted_model, traindataname="D", datasources = connections) + + + # Clear the Datashield R sessions and logout + datashield.logout(connections) +} + +} +\author{ +Zulal Bekerecioglu +} diff --git a/man/ds.round.Rd b/man/ds.round.Rd new file mode 100644 index 00000000..8afee7d9 --- /dev/null +++ b/man/ds.round.Rd @@ -0,0 +1,86 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds.round.R +\name{ds.round} +\alias{ds.round} +\title{ds.round} +\usage{ +ds.round( + x = NULL, + type = c("round", "ceiling", "floor", "trunc", "signif"), + digits = 0, + add.column = FALSE, + newobj = "rounding.result", + datasources = NULL +) +} +\arguments{ +\item{x}{Character vector specifying the server-side object(s). For data-frame columns, use the format \code{df$column}.} + +\item{type}{Character string specifying the operation: \code{"round"}, \code{"ceiling"}, \code{"floor"}, +\code{trunc}, or \code{"signif"}.} + +\item{digits}{Number of digits to be used in arguments \code{"round"} and \code{"signif"}.} + +\item{add.column}{Logical. If \code{FALSE}, the result is created as a new server-side object; +if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.} + +\item{newobj}{Character string for the name of the object that will be created on the server. Default is \code{"rounding.result"}.} + +\item{datasources}{A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +If the \code{datasources} argument is not specified the default set of connections will be used: +see \code{\link[DSI]{datashield.connections_default}}.} +} +\description{ +Generates objects using a server-side object, which can be either a vector or +a data-frame column. Supports five operations: +1. (\code{round}) +2. (\code{ceiling}) +3. (\code{floor}) +4. (\code{trunc}) +5. (\code{signif}) +where each function in baseR is applied on the server side to the specified object. +} +\details{ +Note: \code{add.column = TRUE} is only valid for data-frame inputs. + +Server function called: \code{DateDS} +} +\examples{ + +\dontrun{ + + require('DSI') + require('DSOpal') + require('dsBaseClient') + + builder <- DSI::newDSLoginBuilder() + builder$append(server = "study1", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "DASIM.DASIM1", driver = "OpalDriver") + builder$append(server = "study2", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "DASIM.DASIM2", driver = "OpalDriver") + logindata <- builder$build() + + # Log onto the remote Opal training servers + connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + + ds.make(toAssign = "D$LAB_TSC", newobj = 'LAB.TSC.obj', datasources = connections) + + # Example 1: Give a numeric object, save as a new object + ds.round("LAB.TSC.obj", digits=2, add.column = FALSE) + + # Example 2: Give a column, save as a new column. + ds.round("D$LAB_HDL", type = "ceiling", newobj = "LAB_rounded_HDL", add.column = TRUE) + + + # Clear the Datashield R sessions and logout + datashield.logout(connections) +} + +} +\author{ +Zulal Bekerecioglu +} diff --git a/man/ds.scale.Rd b/man/ds.scale.Rd new file mode 100644 index 00000000..cab5bb2e --- /dev/null +++ b/man/ds.scale.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds.scale.R +\name{ds.scale} +\alias{ds.scale} +\title{Standardizes a server-side vector} +\usage{ +ds.scale( + x = NULL, + newobj = "scaled.data", + add.column = FALSE, + datasources = NULL +) +} +\arguments{ +\item{x}{A character string specifying the server-side vector For data-frame columns, use the format \code{df$column}.} + +\item{newobj}{A character string for the name of the object that will be created on the server. Default is \code{"scaled.data"}.} + +\item{add.column}{Logical. If \code{FALSE}, the result is created as a new server-side object; +if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.} + +\item{datasources}{A list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +If the \code{datasources} argument is not specified the default set of connections will be used: +see \code{\link[DSI]{datashield.connections_default}}.} +} +\description{ +Scales / standardizes a server-side vector using the scale function +} +\details{ +Note: \code{add.column = TRUE} is only valid for data-frame inputs. + +Server function called: \code{scaleDS} +} +\examples{ + +\dontrun{ + + require('DSI') + require('DSOpal') + require('dsBaseClient') + + builder <- DSI::newDSLoginBuilder() + builder$append(server = "study1", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "GWAS.ega_phenotypes_1", driver = "OpalDriver") + builder$append(server = "study2", + url = "https://opal-demo.obiba.org", + user = "dsuser", password = "P@ssw0rd", + table = "GWAS.ega_phenotypes_2", driver = "OpalDriver") + logindata <- builder$build() + + # Log onto the remote Opal training servers + connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + + ds.make(toAssign = "D$energy", newobj = 'energy.obj', datasources = connections) + + # Example 1: Give a column, save as a new column + ds.scale(x="D$age_recruitment", newobj="scaled.age.recruitment", add.column=TRUE) + + # Example 2: Give a numeric object, save as a new object + ds.scale(x="energy.obj", newobj="scaled.energy", add.column=FALSE) + + + # Clear the Datashield R sessions and logout + datashield.logout(connections) +} + +} +\author{ +Zulal Bekerecioglu +}