Main function and parameters

Here are the parameters to run the TAD::launch_analysis_tad function, and how it is run. One file is provided for each output. Those outputs will be written by TAD::launch_analysis_tad and we will read the outputs in the next sections.

The randomisation number is very low, so the code executes fast.


weights <- TAD::AB[, 5:20]
weights_factor <- TAD::AB[, c("Year", "Plot", "Treatment", "Bloc")]
trait_data <- log(TAD::trait[["SLA"]][seq_len(ncol(weights))])
aggregation_factor_name <- c("Year", "Bloc")
statistics_factor_name <- c("Treatment")
regenerate_abundance_df <- TRUE
regenerate_weighted_moments_df <- TRUE
regenerate_stat_per_obs_df <- TRUE
regenerate_stat_per_rand_df <- TRUE
regenerate_stat_skr_df <- TRUE
randomization_number <- 20
seed <- 1312
significativity_threshold <- c(0.025, 0.975)
lin_mod <- "lm"
slope_distance <- 1
intercept_distance <- 1.86

produce_results <- function(
  abundance_file,
  weighted_moments_file,
  stat_per_obs_file,
  stat_per_rand_file,
  stat_skr_param_file
) {

  TAD::launch_analysis_tad(
    weights = weights,
    weights_factor = weights_factor,
    trait_data = trait_data,
    randomization_number = randomization_number,
    aggregation_factor_name = aggregation_factor_name,
    statistics_factor_name = statistics_factor_name,
    seed = seed,
    abundance_file = abundance_file,
    weighted_moments_file = weighted_moments_file,
    stat_per_obs_file = stat_per_obs_file,
    stat_per_rand_file = stat_per_rand_file,
    stat_skr_param_file = stat_skr_param_file,
    regenerate_abundance_df = regenerate_abundance_df,
    regenerate_weighted_moments_df = regenerate_weighted_moments_df,
    regenerate_stat_per_obs_df = regenerate_stat_per_obs_df,
    regenerate_stat_per_rand_df = regenerate_stat_per_rand_df,
    regenerate_stat_skr_df = regenerate_stat_skr_df,
    significativity_threshold = significativity_threshold,
    lin_mod = lin_mod,
    slope_distance = slope_distance,
    intercept_distance = intercept_distance
  )
}

Get CSV outputs


## We don't especially need multiprocessing
future::plan(future::sequential)

## We define the outputs.
csv_files <- c(
  abundance_csv_file <- "abundance_file.csv",
  weighted_moments_csv_file <- "weighted_moments_file.csv",
  stat_per_obs_csv_file <- "stat_per_obs_file.csv",
  stat_per_rand_csv_file <- "stat_per_rand_file.csv",
  stat_skr_param_csv_file <- "stat_skr_param_file.csv"
)

## We run the analysis and provide the paths to write the results to.
results <- produce_results(
  abundance_file = abundance_csv_file,
  weighted_moments_file = weighted_moments_csv_file,
  stat_per_obs_file = stat_per_obs_csv_file,
  stat_per_rand_file = stat_per_rand_csv_file,
  stat_skr_param_file = stat_skr_param_csv_file
)

## Let's see if all the files have been created: we should
##   see five CSV files here
list.files(pattern = "*.csv", full.names = TRUE)
#> [1] "./abundance_file.csv"        "./stat_per_obs_file.csv"    
#> [3] "./stat_per_rand_file.csv"    "./stat_skr_param_file.csv"  
#> [5] "./weighted_moments_file.csv"

Showing CSV outputs

abundance_from_csv <- TAD::load_abundance_dataframe(
  abundance_csv_file
)
head(abundance_from_csv[
  c(1:3),
  c(colnames(abundance_from_csv) %in% colnames(abundance_from_csv)[1:8])
])
#>   number    index1    index2 index3 index4    index5   index6 index7
#> 1      0 0.0000000 0.0000000      0      0  1.000000 1.000000      0
#> 2      0 0.8403361 0.8403361      0      0  1.680672 0.000000      0
#> 3      0 0.0000000 0.0000000      0      0 21.259843 1.574803      0

weighted_moments_from_csv <- TAD::load_weighted_moments(
  weighted_moments_csv_file,
  factor_names = c("Year", "Plot", "Bloc")
)
head(weighted_moments_from_csv, n = 3)
#>   Year Plot Treatment Bloc number     mean    variance   skewness kurtosis
#> 1 2010    6  Mown_NPK    1      0 3.312137 0.013064571  0.0000000  1.00000
#> 2 2010   13  Mown_NPK    1      0 3.183708 0.060678220 -0.8908442  2.61656
#> 3 2010   20  Mown_NPK    2      0 3.213602 0.003355466  3.4020691 12.57407
#>   distance_law
#> 1  -0.86000000
#> 2  -0.03704301
#> 3  -0.86000000

stat_per_obs_from_csv <- TAD::load_statistics_per_obs(
  stat_per_obs_csv_file,
  factor_names = c("Year", "Plot", "Bloc")
)
head(stat_per_obs_from_csv, n = 3)
#>   Year Plot Treatment Bloc standardized_observedmean
#> 1 2010    6  Mown_NPK    1                 0.3307537
#> 2 2010   13  Mown_NPK    1                -0.8739049
#> 3 2010   20  Mown_NPK    2                 0.1927875
#>   standardized_min_quantilemean standardized_max_quantilemean significancemean
#> 1                     -1.845560                      1.107442            FALSE
#> 2                     -1.306323                      2.034754            FALSE
#> 3                     -1.442063                      1.079019            FALSE
#>   standardized_observedvariance standardized_min_quantilevariance
#> 1                   -0.36071237                        -0.5709610
#> 2                   -0.01502695                        -2.0413751
#> 3                   -0.65298390                        -0.9586044
#>   standardized_max_quantilevariance significancevariance
#> 1                          2.078434                FALSE
#> 2                          1.384701                FALSE
#> 3                          1.782211                FALSE
#>   standardized_observedskewness standardized_min_quantileskewness
#> 1                     0.1093812                        -1.1187589
#> 2                     0.6625729                        -1.3435337
#> 3                     0.9746794                        -0.9746794
#>   standardized_max_quantileskewness significanceskewness
#> 1                         1.6253667                FALSE
#> 2                         2.1153354                FALSE
#> 3                         0.9746794                FALSE
#>   standardized_observedkurtosis standardized_min_quantilekurtosis
#> 1                    0.00000000                          0.000000
#> 2                    0.09646385                         -2.269381
#> 3                    0.27232206                         -1.173578
#>   standardized_max_quantilekurtosis significancekurtosis
#> 1                          1.779513                FALSE
#> 2                          1.118933                FALSE
#> 3                          2.360124                FALSE

stat_per_rand_from_csv <- TAD::load_statistics_per_random(
  stat_per_rand_csv_file,
  factor_names = c("Treatment")
)
head(stat_per_rand_from_csv, n = 3)
#>   number         Treatment    slope intercept   rsquare  tad_stab
#> 1      0          Mown_NPK 1.257924  1.358449 0.8494475 2.2072484
#> 2      0 Mown_Unfertilized 1.424814  1.329256 0.8825581 0.5975996
#> 3      1          Mown_NPK 1.078927  1.230682 0.9720407 1.1807309
#>   distance_to_family cv_distance_to_family
#> 1          2.4530737              300.8936
#> 2          0.7977601              169.9280
#> 3          1.3105775              266.3767

stat_skr_param_from_csv <- TAD::load_stat_skr_param(
  stat_skr_param_csv_file,
  character_names = c("Treatment")
)
#> Warning in load_tad_table(path = path, table_name = env_attr, sep = ",", :
#> Unknown attribute distance_to_family_ses in table stat_skr_param loaded from
#> /tmp/Rtmp9vugT9/Rbuild1e4af4c5d9836/TAD/vignettes/stat_skr_param_file.csv.
#> Warning in load_tad_table(path = path, table_name = env_attr, sep = ",", :
#> Unknown attribute cv_distance_to_family_ses in table stat_skr_param loaded from
#> /tmp/Rtmp9vugT9/Rbuild1e4af4c5d9836/TAD/vignettes/stat_skr_param_file.csv.
head(stat_skr_param_from_csv, n = 3)
#>   slope_ses slope_signi intercept_ses intercept_signi rsquare_ses rsquare_signi
#> 1  1.341678       FALSE    -0.9032159           FALSE   -1.849008          TRUE
#> 2  3.112774        TRUE    -1.0340836           FALSE   -1.062311         FALSE
#>   tad_stab_ses tad_stab_signi tad_eve_ses tad_eve_signi cv_tad_eve_ses
#> 1     1.163717          FALSE    1.432937         FALSE     0.05052686
#> 2    -1.074969          FALSE   -1.049399         FALSE    -1.25788001
#>   cv_tad_eve_signi         Treatment
#> 1            FALSE          Mown_NPK
#> 2            FALSE Mown_Unfertilized

warnings()

Get tsv outputs


## We don't want multiprocessing
future::plan(future::sequential)

## We define the outputs.
tsv_files <- c(
  abundance_tsv_file <- "abundance_file.tsv",
  weighted_moments_tsv_file <- "weighted_moments_file.tsv",
  stat_per_obs_tsv_file <- "stat_per_obs_file.tsv",
  stat_per_rand_tsv_file <- "stat_per_rand_file.tsv",
  stat_skr_param_tsv_file <- "stat_skr_param_file.tsv"
)

## We run the analysis and provide the paths to write the results to.
results <- produce_results(
  abundance_file = abundance_tsv_file,
  weighted_moments_file = weighted_moments_tsv_file,
  stat_per_obs_file = stat_per_obs_tsv_file,
  stat_per_rand_file = stat_per_rand_tsv_file,
  stat_skr_param_file = stat_skr_param_tsv_file
)

## Let's see if all the files have been created: we should
##   see five tsv files here
list.files(pattern = "*.tsv", full.names = TRUE)
#> [1] "./abundance_file.tsv"        "./stat_per_obs_file.tsv"    
#> [3] "./stat_per_rand_file.tsv"    "./stat_skr_param_file.tsv"  
#> [5] "./weighted_moments_file.tsv"

Showing tsv outputs

abundance_from_tsv <- TAD::load_abundance_dataframe(
  abundance_tsv_file
)
head(abundance_from_tsv[
  c(1:3),
  c(colnames(abundance_from_tsv) %in% colnames(abundance_from_tsv)[1:8])
])
#>   number    index1    index2 index3 index4    index5   index6 index7
#> 1      0 0.0000000 0.0000000      0      0  1.000000 1.000000      0
#> 2      0 0.8403361 0.8403361      0      0  1.680672 0.000000      0
#> 3      0 0.0000000 0.0000000      0      0 21.259843 1.574803      0

weighted_moments_from_tsv <- TAD::load_weighted_moments(
  weighted_moments_tsv_file,
  factor_names = c("Year", "Plot", "Bloc")
)
head(weighted_moments_from_tsv, n = 3)
#>   Year Plot Treatment Bloc number     mean    variance   skewness kurtosis
#> 1 2010    6  Mown_NPK    1      0 3.312137 0.013064571  0.0000000  1.00000
#> 2 2010   13  Mown_NPK    1      0 3.183708 0.060678220 -0.8908442  2.61656
#> 3 2010   20  Mown_NPK    2      0 3.213602 0.003355466  3.4020691 12.57407
#>   distance_law
#> 1  -0.86000000
#> 2  -0.03704301
#> 3  -0.86000000

stat_per_obs_from_tsv <- TAD::load_statistics_per_obs(
  stat_per_obs_tsv_file,
  factor_names = c("Year", "Plot", "Bloc")
)
head(stat_per_obs_from_tsv, n = 3)
#>   Year Plot Treatment Bloc standardized_observedmean
#> 1 2010    6  Mown_NPK    1                 0.3307537
#> 2 2010   13  Mown_NPK    1                -0.8739049
#> 3 2010   20  Mown_NPK    2                 0.1927875
#>   standardized_min_quantilemean standardized_max_quantilemean significancemean
#> 1                     -1.845560                      1.107442            FALSE
#> 2                     -1.306323                      2.034754            FALSE
#> 3                     -1.442063                      1.079019            FALSE
#>   standardized_observedvariance standardized_min_quantilevariance
#> 1                   -0.36071237                        -0.5709610
#> 2                   -0.01502695                        -2.0413751
#> 3                   -0.65298390                        -0.9586044
#>   standardized_max_quantilevariance significancevariance
#> 1                          2.078434                FALSE
#> 2                          1.384701                FALSE
#> 3                          1.782211                FALSE
#>   standardized_observedskewness standardized_min_quantileskewness
#> 1                     0.1093812                        -1.1187589
#> 2                     0.6625729                        -1.3435337
#> 3                     0.9746794                        -0.9746794
#>   standardized_max_quantileskewness significanceskewness
#> 1                         1.6253667                FALSE
#> 2                         2.1153354                FALSE
#> 3                         0.9746794                FALSE
#>   standardized_observedkurtosis standardized_min_quantilekurtosis
#> 1                    0.00000000                          0.000000
#> 2                    0.09646385                         -2.269381
#> 3                    0.27232206                         -1.173578
#>   standardized_max_quantilekurtosis significancekurtosis
#> 1                          1.779513                FALSE
#> 2                          1.118933                FALSE
#> 3                          2.360124                FALSE

stat_per_rand_from_tsv <- TAD::load_statistics_per_random(
  stat_per_rand_tsv_file,
  factor_names = c("Treatment")
)
head(stat_per_rand_from_tsv, n = 3)
#>   number         Treatment    slope intercept   rsquare  tad_stab
#> 1      0          Mown_NPK 1.257924  1.358449 0.8494475 2.2072484
#> 2      0 Mown_Unfertilized 1.424814  1.329256 0.8825581 0.5975996
#> 3      1          Mown_NPK 1.078927  1.230682 0.9720407 1.1807309
#>   distance_to_family cv_distance_to_family
#> 1          2.4530737              300.8936
#> 2          0.7977601              169.9280
#> 3          1.3105775              266.3767

stat_skr_param_from_tsv <- TAD::load_stat_skr_param(
  stat_skr_param_tsv_file,
  character_names = c("Treatment")
)
#> Warning in load_tad_table(path = path, table_name = env_attr, sep = "\t", :
#> Unknown attribute distance_to_family_ses in table stat_skr_param loaded from
#> /tmp/Rtmp9vugT9/Rbuild1e4af4c5d9836/TAD/vignettes/stat_skr_param_file.tsv.
#> Warning in load_tad_table(path = path, table_name = env_attr, sep = "\t", :
#> Unknown attribute cv_distance_to_family_ses in table stat_skr_param loaded from
#> /tmp/Rtmp9vugT9/Rbuild1e4af4c5d9836/TAD/vignettes/stat_skr_param_file.tsv.
head(stat_skr_param_from_tsv, n = 3)
#>   slope_ses slope_signi intercept_ses intercept_signi rsquare_ses rsquare_signi
#> 1  1.341678       FALSE    -0.9032159           FALSE   -1.849008          TRUE
#> 2  3.112774        TRUE    -1.0340836           FALSE   -1.062311         FALSE
#>   tad_stab_ses tad_stab_signi tad_eve_ses tad_eve_signi cv_tad_eve_ses
#> 1     1.163717          FALSE    1.432937         FALSE     0.05052686
#> 2    -1.074969          FALSE   -1.049399         FALSE    -1.25788001
#>   cv_tad_eve_signi         Treatment
#> 1            FALSE          Mown_NPK
#> 2            FALSE Mown_Unfertilized

TSV and CSV should be identical

print(identical(abundance_from_tsv, abundance_from_csv))
#> [1] TRUE
print(identical(weighted_moments_from_tsv, weighted_moments_from_csv))
#> [1] TRUE
print(identical(stat_per_obs_from_tsv, stat_per_obs_from_csv))
#> [1] TRUE
print(identical(stat_per_rand_from_tsv, stat_per_rand_from_csv))
#> [1] TRUE
print(identical(stat_skr_param_from_tsv, stat_skr_param_from_csv))
#> [1] TRUE

Get rda outputs


## We don't want multiprocessing
future::plan(future::sequential)

## We define the outputs.
rda_files <- c(
  abundance_rda_file <- "abundance_file.rda",
  weighted_moments_rda_file <- "weighted_moments_file.rda",
  stat_per_obs_rda_file <- "stat_per_obs_file.rda",
  stat_per_rand_rda_file <- "stat_per_rand_file.rda",
  stat_skr_param_rda_file <- "stat_skr_param_file.rda"
)

## We run the analysis and provide the paths to write the results to.
results <- produce_results(
  abundance_file = abundance_rda_file,
  weighted_moments_file = weighted_moments_rda_file,
  stat_per_obs_file = stat_per_obs_rda_file,
  stat_per_rand_file = stat_per_rand_rda_file,
  stat_skr_param_file = stat_skr_param_rda_file
)

## Let's see if all the files have been created: we should
##   see five rda files here
list.files(pattern = "*.rda", full.names = TRUE)
#> [1] "./abundance_file.rda"        "./stat_per_obs_file.rda"    
#> [3] "./stat_per_rand_file.rda"    "./stat_skr_param_file.rda"  
#> [5] "./weighted_moments_file.rda"

Showing rda outputs

abundance_from_rda <- TAD::load_abundance_dataframe(
  abundance_rda_file
)
head(abundance_from_rda[
  c(1:3),
  c(colnames(abundance_from_rda) %in% colnames(abundance_from_rda)[1:8])
])
#>   number    index1    index2 index3 index4    index5   index6 index7
#> 1      0 0.0000000 0.0000000      0      0  1.000000 1.000000      0
#> 2      0 0.8403361 0.8403361      0      0  1.680672 0.000000      0
#> 3      0 0.0000000 0.0000000      0      0 21.259843 1.574803      0

weighted_moments_from_rda <- TAD::load_weighted_moments(
  weighted_moments_rda_file
)
head(weighted_moments_from_rda, n = 3)
#>   Year Plot Treatment Bloc number     mean    variance   skewness kurtosis
#> 1 2010    6  Mown_NPK    1      0 3.312137 0.013064571  0.0000000  1.00000
#> 2 2010   13  Mown_NPK    1      0 3.183708 0.060678220 -0.8908442  2.61656
#> 3 2010   20  Mown_NPK    2      0 3.213602 0.003355466  3.4020691 12.57407
#>   distance_law
#> 1  -0.86000000
#> 2  -0.03704301
#> 3  -0.86000000

stat_per_obs_from_rda <- TAD::load_statistics_per_obs(
  stat_per_obs_rda_file
)
head(stat_per_obs_from_rda, n = 3)
#>   Year Plot Treatment Bloc standardized_observedmean
#> 1 2010    6  Mown_NPK    1                 0.3307537
#> 2 2010   13  Mown_NPK    1                -0.8739049
#> 3 2010   20  Mown_NPK    2                 0.1927875
#>   standardized_min_quantilemean standardized_max_quantilemean significancemean
#> 1                     -1.845560                      1.107442            FALSE
#> 2                     -1.306323                      2.034754            FALSE
#> 3                     -1.442063                      1.079019            FALSE
#>   standardized_observedvariance standardized_min_quantilevariance
#> 1                   -0.36071237                        -0.5709610
#> 2                   -0.01502695                        -2.0413751
#> 3                   -0.65298390                        -0.9586044
#>   standardized_max_quantilevariance significancevariance
#> 1                          2.078434                FALSE
#> 2                          1.384701                FALSE
#> 3                          1.782211                FALSE
#>   standardized_observedskewness standardized_min_quantileskewness
#> 1                     0.1093812                        -1.1187589
#> 2                     0.6625729                        -1.3435337
#> 3                     0.9746794                        -0.9746794
#>   standardized_max_quantileskewness significanceskewness
#> 1                         1.6253667                FALSE
#> 2                         2.1153354                FALSE
#> 3                         0.9746794                FALSE
#>   standardized_observedkurtosis standardized_min_quantilekurtosis
#> 1                    0.00000000                          0.000000
#> 2                    0.09646385                         -2.269381
#> 3                    0.27232206                         -1.173578
#>   standardized_max_quantilekurtosis significancekurtosis
#> 1                          1.779513                FALSE
#> 2                          1.118933                FALSE
#> 3                          2.360124                FALSE

stat_per_rand_from_rda <- TAD::load_statistics_per_random(
  stat_per_rand_rda_file
)
head(stat_per_rand_from_rda, n = 3)
#>   number         Treatment    slope intercept   rsquare  tad_stab
#> 1      0          Mown_NPK 1.257924  1.358449 0.8494475 2.2072484
#> 2      0 Mown_Unfertilized 1.424814  1.329256 0.8825581 0.5975996
#> 3      1          Mown_NPK 1.078927  1.230682 0.9720407 1.1807309
#>   distance_to_family cv_distance_to_family
#> 1          2.4530737              300.8936
#> 2          0.7977601              169.9280
#> 3          1.3105775              266.3767

stat_skr_param_from_rda <- TAD::load_stat_skr_param(
  stat_skr_param_rda_file
)
head(stat_skr_param_from_rda, n = 3)
#>   slope_ses slope_signi intercept_ses intercept_signi rsquare_ses rsquare_signi
#> 1  1.341678       FALSE    -0.9032159           FALSE   -1.849008          TRUE
#> 2  3.112774        TRUE    -1.0340836           FALSE   -1.062311         FALSE
#>   tad_stab_ses tad_stab_signi tad_eve_ses tad_eve_signi cv_tad_eve_ses
#> 1     1.163717          FALSE    1.432937         FALSE     0.05052686
#> 2    -1.074969          FALSE   -1.049399         FALSE    -1.25788001
#>   cv_tad_eve_signi         Treatment
#> 1            FALSE          Mown_NPK
#> 2            FALSE Mown_Unfertilized

RDA and loaded CSV hold the same values

They are not absolutly identical because of floaing point imprecision inherantly due to floats representation in computers.

But, they are still equals +/- the computer’s imprecision (10e-16, usually)

print(all.equal(abundance_from_rda, abundance_from_csv))
#> [1] TRUE
print(all.equal(weighted_moments_from_rda, weighted_moments_from_csv))
#> [1] TRUE
print(all.equal(stat_per_obs_from_rda, stat_per_obs_from_csv))
#> [1] TRUE
print(all.equal(stat_per_rand_from_rda, stat_per_rand_from_csv))
#> [1] TRUE
print(all.equal(stat_skr_param_from_rda, stat_skr_param_from_csv))
#> [1] TRUE
print(all.equal(abundance_from_rda, abundance_from_csv))
#> [1] TRUE
print(all.equal(weighted_moments_from_rda, weighted_moments_from_csv))
#> [1] TRUE
print(all.equal(stat_per_obs_from_rda, stat_per_obs_from_csv))
#> [1] TRUE
print(all.equal(stat_per_rand_from_rda, stat_per_rand_from_csv))
#> [1] TRUE
print(all.equal(stat_skr_param_from_rda, stat_skr_param_from_csv))
#> [1] TRUE

Get outputs in different formats