Skip to contents

This vignette provides a runnable short test for the end-to-end model fit with simulation, fit, prediction/simulation and benchmark to standard models for a normal margin with normal copula over 3 timepoint observations of 20 subjects.

library(gamlss.longitudinal)
#> Loading required package: gamlss.dist
#> This is gamlss.longitudinal 0.1.0. For overview type 'help("gamlss.longitudinal-package")'.
#> Warning: gamlss.longitudinal is in active development; please report issues at https://github.com/ahibbert/gamlss.longitudinal/issues
library(gamlss.dist)

dat <- simulate_longitudinal_dataset(
  n = 20,
  times = 0:2,
  margin_dist = NO(),
  copula_dist = "N",
  margin_params = list(mu = function(d) 1 + 0.2 * d$time, sigma = 0.6),
  copula_params = list(theta = 0.25),
  seed = 1
)

fit <- gamlss_longitudinal(
  dataset = dat,
  margin_dist = NO(),
  copula_dist = "N",
  time_var = "time",
  subject_var = "subject",
  mu.formula = response ~ time,
  sigma.formula = ~ 1,
  theta.formula = ~ 1,
  max_outer_iter = 5,
  max_inner_iter = 5,
  compute_vcov = FALSE,
  verbose = 0
)

head(predict(fit, type = "mean"))
#> [1] 1.163827 1.302172 1.440517 1.163827 1.302172 1.440517
head(predict(fit, type = "quantile", probs = c(0.1, 0.5, 0.9)))
#>   subject time  response       q01      q05      q09
#> 1       1    0 0.6241277 0.5247457 1.163827 1.802909
#> 2       1    1 1.8953534 0.6630906 1.302172 1.941254
#> 3       1    2 1.4782588 0.8014356 1.440517 2.079599
#> 4      10    0 0.0760300 0.5247457 1.163827 1.802909
#> 5      10    1 1.6382078 0.6630906 1.302172 1.941254
#> 6      10    2 0.8904998 0.8014356 1.440517 2.079599
head(predict(fit, type = "probability", q = 1, direction = "above"))
#>   subject time  response q direction probability
#> 1       1    0 0.6241277 1     above   0.6287420
#> 2       1    1 1.8953534 1     above   0.7277249
#> 3       1    2 1.4782588 1     above   0.8114818
#> 4      10    0 0.0760300 1     above   0.6287420
#> 5      10    1 1.6382078 1     above   0.7277249
#> 6      10    2 0.8904998 1     above   0.8114818

check_model(fit, include_vcov = FALSE)
#> 
#> GAMLSS Longitudinal Model Check
#> ===============================
#> Margin: NO    Copula: N
#> LogLik: -43.22    Converged: yes
#> 
#> Basic Checks
#> ------------
#>  Area                 Status
#>  Convergence          PASS  
#>  Marginal fit         PASS  
#>  Tail fit             PASS  
#>  Copula fit           PASS  
#>  Variance calculation PASS  
#> 
#> Result: PASSED
#> Note: these are basic automated checks; broader model diagnostics should also be reviewed.
#> 
#> Scores
#> ------
#>    logs    mae    mse    dss
#>  0.7229 0.4135 0.2486 -0.392
#> 
#> PIT
#> ---
#>   n   mean     sd expected_sd ks_p_value
#>  60 0.4999 0.3006      0.2887     0.9632
#> 
#> Tail Calibration
#> ----------------
#>  threshold lower   upper expected lower_ratio upper_ratio
#>       0.05  0.05 0.03333     0.05           1      0.6667
#>       0.10  0.10 0.05000     0.10           1      0.5000
#> 
#> Residual Dependence
#> -------------------
#>  lag normal_score_cor n_pairs cutoff
#>    1         -0.02083      40   0.25
#> 
#> Use check$checks for thresholds and check$warnings for failed-check details.
reporting_table(fit, dat, by = "time", threshold = 1)
#>   time  n     mean       mu   median       q01      q05      q09 prob_above_1
#> 1    0 20 1.163827 1.163827 1.163827 0.5247457 1.163827 1.802909    0.6287420
#> 2    1 20 1.302172 1.302172 1.302172 0.6630906 1.302172 1.941254    0.7277249
#> 3    2 20 1.440517 1.440517 1.440517 0.8014356 1.440517 2.079599    0.8114818
model_spec(fit)
#> 
#> GAMLSS Longitudinal Model Specification
#> --------------------------------------
#> Response: response  | Subject: subject  | Time: time 
#> Margin: NO  | Copula: N 
#> Optimisation: RS  | Converged: yes 
#> Missing responses: 0 of 60 
#> VCOV: analytical 
#> 
#> Formulas
#>                mu             sigma                nu               tau 
#> "response ~ time"              "~1"         "\"~ 1\""         "\"~ 1\"" 
#>             theta              zeta 
#>              "~1"         "\"~ 1\"" 
#> 
#> Margin Links
#>  parameter     link
#>         mu identity
#>      sigma      log
#> 
#> Likelihood And Information Criteria
#>        marginal copula    joint
#> LogLik -43.3758 0.1538 -43.2220
#> AIC     92.7515 1.6924  94.4440
#> BIC     99.0346 3.7868 102.8214
#> EDF      3.0000 1.0000   4.0000
#> 
#> Dependence By Time (includes Kendall's tau)
#>  time n_obs theta_fit tau_fit
#>     0    20   0.09119 0.05813
#>     1    20   0.09119 0.05813
#>     2    20       NaN     NaN

sim <- simulate(fit, nsim = 2, seed = 1)
head(sim)
#>       sim_1    sim_2
#> 1 0.8514286 1.841371
#> 2 1.1116770 1.094359
#> 3 1.5143433 1.370522
#> 4 1.8269691 0.947745
#> 5 0.9476708 1.474986
#> 6 2.0400790 1.133748

benchmark_standard_models(
  data = dat,
  formula = response ~ time,
  subject_var = "subject",
  family = "gaussian",
  comparators = "glm",
  fit = fit
)
#> 
#> Standard Longitudinal Comparator Benchmark
#> -----------------------------------------
#> Formula: response ~ time 
#> Subject: subject 
#> Family: gaussian 
#> Methods included: gamlss.longitudinal, glm 
#> Estimand note: mean-fit metrics compare response-mean predictions; distributional metrics use fitted quantiles, densities, PIT values, intervals, and tail probabilities where available.
#> 
#> Benchmark Summary
#> -----------------
#> 
#> Distributional
#> --------------
#>                            Metric               Best model
#>             95% interval coverage gamlss.longitudinal, glm
#>                95% interval width gamlss.longitudinal, glm
#>                Negative log score gamlss.longitudinal, glm
#>                    PIT KS p-value                      glm
#>           PIT mean absolute error      gamlss.longitudinal
#>               90th percentile MAE                      glm
#>               Lower 5% tail error gamlss.longitudinal, glm
#>               Upper 5% tail error gamlss.longitudinal, glm
#>  90th percentile upper-tail error                      glm
#> 
#> Mean fit
#> --------
#>                  Metric               Best model
#>   Observed response MAE gamlss.longitudinal, glm
#>               Mean bias gamlss.longitudinal, glm
#>                Mean MAE gamlss.longitudinal, glm
#>               Mean RMSE gamlss.longitudinal, glm
#>  Observed response RMSE gamlss.longitudinal, glm
#> 
#> Runtime
#> -------
#>              Metric Best model
#>  Elapsed time (sec)        glm
#> 
#> Benchmark Details
#> -----------------
#> Benchmark Metrics
#> -----------------
#>                            Metric gamlss.longitudinal        glm
#>                Elapsed time (sec)                  NA   0.002168
#>            Number of observations           6.000e+01  60.000000
#>                    Log likelihood          -4.322e+01 -43.375310
#>                               AIC           9.444e+01  92.750620
#>                               MAE           4.135e-01   0.413443
#>                              RMSE           4.986e-01   0.498567
#>                         Mean RMSE           1.139e-01   0.115620
#>                          Mean MAE           1.022e-01   0.104125
#>                         Mean bias           1.022e-01   0.104125
#>            Observed response RMSE           4.986e-01   0.498567
#>             Observed response MAE           4.135e-01   0.413443
#>               90th percentile MAE           5.033e-02   0.047860
#>                Negative log score           7.229e-01   0.722992
#>  90th percentile upper-tail error          -8.287e-03  -0.005864
#>             95% interval coverage           9.500e-01   0.950000
#>                95% interval width           1.955e+00   1.970841
#>           PIT mean absolute error           8.176e-05   0.001167
#>                    PIT KS p-value           9.632e-01   0.975698
#>               Lower 5% tail error           0.000e+00   0.000000
#>               Upper 5% tail error          -1.667e-02  -0.016667
#> 
#> Mu Coefficients
#> ---------------
#> Coefficient Estimates
#> 
#>       term gamlss.longitudinal    glm
#>  intercept              1.1638 1.1657
#>       time              0.1383 0.1384
#> 
#> Standard Errors
#> 
#>       term gamlss.longitudinal     glm
#>  intercept             0.10414 0.10351
#>       time             0.07852 0.08018
#> 
#> 95% Lower Estimates
#> 
#>       term gamlss.longitudinal     glm
#>  intercept             0.95972  0.9628
#>       time            -0.01555 -0.0187
#> 
#> 95% Upper Estimates
#> 
#>       term gamlss.longitudinal    glm
#>  intercept              1.3679 1.3686
#>       time              0.2922 0.2956