Skip to contents

[Experimental]

Can standardize by either 1) log()-transforming and then applying scale() (mean-center and scaled by standard deviation), or 2) if regressed_on variables are given, then log-transforming, running a linear regression to obtain the stats::residuals(), and finally scaled. Use regressed_on to try to remove influence of potential confounding.

Usage

nc_standardize(data, cols = everything(), regressed_on = NULL)

Arguments

data

Data frame.

cols

Metabolic variables that will make up the network.

regressed_on

Optional. A character vector of variables to regress the metabolic variables on. Use if you want to standardize the metabolic variables on variables that are known to influence them, e.g. sex or age. Calculates the residuals from a linear regression model.

Value

Outputs a tibble object, with the original metabolic variables now standardized.

See also

nc_estimate_links for more detailed examples or the vignette("NetCoupler").

Examples


# Don't regress on any variable
simulated_data %>%
  nc_standardize(starts_with("metabolite_"))
#> # A tibble: 2,000 × 18
#>    metabolite_1 metabolite_2 metabolite_3 metabolite_4 metabolite_5 metabolite_6
#>           <dbl>        <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
#>  1        0.318        0.105       1.30         NA           -0.181      -0.441 
#>  2       -1.06         0.622      -1.04         -0.113       -2.14       -0.774 
#>  3       -0.467        1.04        1.13          0.888        0.910      -0.280 
#>  4        1.16         0.162       0.115        -0.664        0.730      -0.202 
#>  5        0.336       -2.80        1.19          1.66         1.02        1.25  
#>  6        0.114        0.319      -0.0494       -1.34        -0.192       0.442 
#>  7        0.586        0.232       0.533         1.84         0.822      -1.58  
#>  8        0.959       -0.763      -0.375         0.523        1.29        1.24  
#>  9       -0.589       -1.01       -0.241        -1.28        -0.789      -0.0464
#> 10       -0.516       -0.575       0.484        -0.369       -0.432       1.48  
#> # ℹ 1,990 more rows
#> # ℹ 12 more variables: metabolite_7 <dbl>, metabolite_8 <dbl>,
#> #   metabolite_9 <dbl>, metabolite_10 <dbl>, outcome_continuous <dbl>,
#> #   metabolite_12 <dbl>, metabolite_11 <dbl>, exposure <dbl>, age <dbl>,
#> #   id <int>, outcome_event_time <dbl>, outcome_binary <int>

# Extract residuals by regressing on a variable
simulated_data %>%
  nc_standardize(starts_with("metabolite_"), "age")
#> # A tibble: 2,000 × 18
#>    metabolite_1 metabolite_2 metabolite_3 metabolite_4 metabolite_5 metabolite_6
#>           <dbl>        <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
#>  1        0.296        0.121       1.28        NA            -0.198      -0.457 
#>  2       -1.07         0.626      -1.04        -0.0980       -2.16       -0.781 
#>  3       -0.483        1.05        1.11         0.920         0.902      -0.289 
#>  4        1.18         0.147       0.137       -0.714         0.745      -0.191 
#>  5        0.339       -2.80        1.19         1.66          1.02        1.25  
#>  6        0.108        0.322      -0.0530      -1.33         -0.197       0.440 
#>  7        0.584        0.234       0.532        1.84          0.822      -1.59  
#>  8        0.971       -0.770      -0.362        0.497         1.30        1.25  
#>  9       -0.616       -0.994      -0.264       -1.22         -0.809      -0.0623
#> 10       -0.528       -0.568       0.476       -0.346        -0.440       1.48  
#> # ℹ 1,990 more rows
#> # ℹ 12 more variables: metabolite_7 <dbl>, metabolite_8 <dbl>,
#> #   metabolite_9 <dbl>, metabolite_10 <dbl>, outcome_continuous <dbl>,
#> #   metabolite_12 <dbl>, metabolite_11 <dbl>, exposure <dbl>, age <dbl>,
#> #   id <int>, outcome_event_time <dbl>, outcome_binary <int>

# Works with factors too
simulated_data %>%
  dplyr::mutate(Sex = as.factor(sample(rep(c("F", "M"), times = nrow(.) / 2)))) %>%
  nc_standardize(starts_with("metabolite_"), c("age", "Sex"))
#> # A tibble: 2,000 × 19
#>    metabolite_1 metabolite_2 metabolite_3 metabolite_4 metabolite_5 metabolite_6
#>           <dbl>        <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
#>  1        0.297        0.121       1.24         NA           -0.175      -0.450 
#>  2       -1.07         0.625      -1.08         -0.131       -2.13       -0.774 
#>  3       -0.483        1.05        1.15          0.955        0.878      -0.296 
#>  4        1.18         0.147       0.102        -0.749        0.770      -0.184 
#>  5        0.339       -2.80        1.16          1.62         1.05        1.26  
#>  6        0.108        0.322      -0.0195       -1.29        -0.221       0.433 
#>  7        0.584        0.234       0.566         1.88         0.799      -1.60  
#>  8        0.971       -0.770      -0.329         0.529        1.28        1.24  
#>  9       -0.615       -0.994      -0.296        -1.25        -0.787      -0.0558
#> 10       -0.528       -0.567       0.510        -0.312       -0.465       1.47  
#> # ℹ 1,990 more rows
#> # ℹ 13 more variables: metabolite_7 <dbl>, metabolite_8 <dbl>,
#> #   metabolite_9 <dbl>, metabolite_10 <dbl>, outcome_continuous <dbl>,
#> #   metabolite_12 <dbl>, metabolite_11 <dbl>, exposure <dbl>, age <dbl>,
#> #   id <int>, outcome_event_time <dbl>, outcome_binary <int>, Sex <fct>